######################### # # R code for the study: # Naturally-diverse airborne environmental microbial exposures modulate the gut microbiome # and may provide anxiolytic benefits in mice # # Craig Liddicoat, Harrison Sydnor, Christian Cando-Dumancela, Romy Dresken, Jiajun Liu, Nicholas J.C. Gellie, # Jacob G. Mills, Jennifer M. Young, Laura S. Weyrich, Mark R. Hutchinson, Philip Weinstein, Martin F. Breed # ######################### .libPaths() # "C:/R/R-3.5.1/library" R.Version() # "R version 3.5.1 (2018-07-02)" citation() # R Core Team (2018). R: A language and environment for statistical computing. R Foundation for Statistical # Computing, Vienna, Austria. URL https://www.R-project.org/ library(readxl); packageVersion("readxl") # '1.1.0' library(readr); packageVersion("readr") # '1.1.1' library(plyr); packageVersion("plyr") # '1.8.4' library(dplyr); packageVersion("dplyr") # '0.7.6' library(vegan);packageVersion("vegan") # '2.5.2' library(phyloseq); packageVersion("phyloseq") # '1.24.0' library(ggplot2); packageVersion("ggplot2") # '3.0.0' library(ggrepel); packageVersion("ggrepel") # '0.8.0' library(grid); packageVersion("grid") # '3.5.1' library(reshape2); packageVersion("reshape2") # '1.4.3' library(ggsignif); packageVersion("ggsignif") # '0.4.0' library(moments); packageVersion("moments") # '0.14' library(doParallel); packageVersion("doParallel") # '1.0.11' library(tidyr); packageVersion("tidyr") # '0.8.1' library(DESeq2); packageVersion("DESeq2") # '1.20.0' library(FSA); packageVersion("FSA") # '0.8.20' library(rcompanion); packageVersion("rcompanion") # '1.13.2' library(gridExtra); packageVersion("gridExtra") # '2.3' library(VennDiagram); packageVersion("VennDiagram") # '1.6.20' library(eulerr); packageVersion("eulerr") # '5.0.0' library(gplots); packageVersion("gplots") # '3.0.1' library(decontam); packageVersion("decontam") # '1.1.2' library(rBLAST); packageVersion("rBLAST") # '0.99.1' loads Biostrings, masks strsplit() from base package library(stringr); packageVersion("stringr") # '1.3.1' library(seqinr); packageVersion("seqinr") # '3.4.5' workdir <- "C:/Workspace/PROJ/PAPER-MICRO-MICE/modelling" setwd(workdir) getwd() # datadir <- "C:/Workspace/PROJ/PAPER-MICRO-MICE/modelling/data" datadir par.default <- par() ######################### #### Animal details #------------------------ mice_info <- read_excel(path= paste0(datadir,"/","Animal-details-main-study.xlsx"), sheet=1, range="A1:N55", col_names = TRUE) mice_info <- as.data.frame(mice_info) str(mice_info) # change char to factor mice_info[ , c("mouseID", "Sex", "Litter", "Cage Name", "Treatment", "Ear notch pos", "Rack_position" )] <- lapply(mice_info[ , c("mouseID", "Sex", "Litter", "Cage Name", "Treatment", "Ear notch pos", "Rack_position" )], FUN = factor) str(mice_info) # 'data.frame': 54 obs. of 14 variables: # $ mouseID : Factor w/ 54 levels "C1m1","C1m2",..: 1 2 3 37 38 39 19 20 21 22 ... # $ Animal no : num 1 2 3 4 5 6 7 8 9 10 ... # $ Sex : Factor w/ 2 levels "female","male": 2 2 2 1 1 1 2 2 2 2 ... # $ Litter : Factor w/ 15 levels "Blue 20","Blue 24",..: 2 3 11 2 3 11 11 12 13 12 ... # $ Date of Birth : POSIXct, format: "2018-08-02" "2018-08-04" "2018-08-02" "2018-08-02" ... # $ Cage no : num 1 1 1 2 2 2 3 3 3 4 ... # $ Cage Name : Factor w/ 18 levels "C1","C2","C3",..: 1 1 1 13 13 13 7 7 7 8 ... # $ Treatment : Factor w/ 3 levels "Control","High",..: 1 1 1 3 3 3 2 2 2 2 ... # $ Rack : num 1 1 1 1 1 1 1 1 1 2 ... # $ Ear notch ID : num 1 2 3 1 2 3 1 2 3 1 ... # $ Ear notch pos : Factor w/ 3 levels "(-) No Notch",..: 1 3 2 1 3 2 1 3 2 1 ... # $ Age_received_20_Aug : num 2.6 2.3 2.6 2.6 2.3 2.6 2.6 2.6 2.4 2.6 ... # $ Age_first_soil_exposure_27_Aug: num 3.6 3.3 3.6 3.6 3.3 3.6 3.6 3.6 3.4 3.6 ... # $ Rack_position : Factor w/ 3 levels "low","middle",..: 3 3 3 2 2 2 1 1 1 3 ... table(mice_info$Litter) # Blue 20 Blue 24 Blue 27 Green 13 Green 14 Green 18 Green 21 Green 30 Orange 15 Orange 19 Pink 23 Pink 24 # 5 2 2 4 2 4 1 2 7 5 4 6 # Purple 26 Red 22 Red 23 # 4 2 4 ## read-in mouse weights mice_wt <- read_excel(path= paste0(datadir,"/","Weight-record-sheet-main-study.xlsx"), sheet=1, range="E6:X60", col_names = TRUE) mice_wt <- as.data.frame(mice_wt) str(mice_wt) # remove NA cols sel.na <- which(!complete.cases(t(mice_wt))) mice_wt <- mice_wt[ ,-sel.na] # set mouseID as row name row.names(mice_wt) <- mice_wt[,1] mice_wt <- mice_wt[,-1] temp <- mice_wt # experiment days days <- as.numeric(names(mice_wt)) length(days) # 16 # days of treatment days.treatment <- days - 43339 days.treatment # -6 -5 1 4 8 11 15 17 22 25 29 32 36 39 43 46 # = Treatment days names(mice_wt) # [1] "43333" "43334" "43340" "43343" "43347" "43350" "43354" "43356" "43361" "43364" "43368" "43371" "43375" "43378" "43382" # [16] "43385" ## remove weight of measuring tubs ## read-in tub weights tub_wt <- read_excel(path= paste0(datadir,"/","Weight-record-sheet-main-study.xlsx"), sheet=1, range="AE7:AF16", col_names = TRUE) tub_wt <- as.data.frame(tub_wt) str(tub_wt) # 'data.frame': 9 obs. of 2 variables: # $ tub : chr "C1" "C2" "C3" "L1" ... # $ mass_g: num 34.2 34.8 34.5 34.5 33.9 34.1 33.9 33.4 33.6 row.names(tub_wt) <- tub_wt$tub mice_wt.fix <- mice_wt mice_wt.fix$tub <- paste0(substring(row.names(mice_wt.fix),first=1,last=1) , substring(row.names(mice_wt.fix),first=nchar(row.names(mice_wt.fix)),last=nchar(row.names(mice_wt.fix))) ) ## subtract tub weights from recorded weights names(mice_wt.fix) # [1] "43333" "43334" "43340" "43343" "43347" "43350" "43354" "43356" "43361" "43364" "43368" "43371" "43375" "43378" "43382" # [16] "43385" "tub" for (d in 1:(length(names(mice_wt.fix))-1) ) { #d<-1 print(paste0("calculating for day: ",names(mice_wt.fix)[d])) for (m in 1:length(row.names(mice_wt.fix))) { #m<-1 print(paste0("calculating for mouse no: ",m,", ID = ",row.names(mice_wt.fix)[m])) this_tub_wt <- tub_wt[ mice_wt.fix$tub[m] , "mass_g" ] mice_wt.fix[m,d] <- mice_wt.fix[m,d] - this_tub_wt print(paste0("tub mass for ",mice_wt.fix$tub[m]," is ",tub_wt[ mice_wt.fix$tub[m] , "mass_g" ])) print(paste0("adjusted mouse weight is ",mice_wt.fix[m,d])) print(paste0("------------------------")) } } ## remove last column $tub mice_wt.fix <- mice_wt.fix[ , -dim(mice_wt.fix)[2] ] ## transform and prepare for ggplot wts <- t(mice_wt.fix) melt.wts <- melt(wts, id.vars=names(wts)) head(melt.wts) # Var1 Var2 value # 1 43333 C1m1 12.3 # 2 43334 C1m1 12.8 # 3 43340 C1m1 16.0 # 4 43343 C1m1 16.9 # 5 43347 C1m1 19.0 # 6 43350 C1m1 18.4 names(melt.wts) # "Var1" "Var2" "value" names(melt.wts) <- c( "Day", "mouseID", "weight_g") melt.wts$Treatment <- substring( melt.wts$mouseID, 1, 1 ) melt.wts$Treatment <- factor(melt.wts$Treatment, levels = c("C","L","H"), labels = c("Control","Low","High")) head(melt.wts) # Day mouseID weight_g Treatment # 1 43333 C1m1 12.3 Control # 2 43334 C1m1 12.8 Control # 3 43340 C1m1 16.0 Control # 4 43343 C1m1 16.9 Control # 5 43347 C1m1 19.0 Control # 6 43350 C1m1 18.4 Control ## join to other mouse info: Sex, Litter, Age received, Rack position dat_wts <- melt.wts names(dat_wts) names(mice_info) dat_wts <- merge(x = dat_wts, y = mice_info[ ,c("mouseID","Sex","Litter","Cage Name","Date of Birth","Rack_position")], by = "mouseID", all.x=TRUE) sel <- which(names(dat_wts)=="Cage Name") names(dat_wts)[sel] <- "Cage_Name" sel <- which(names(dat_wts)=="Date of Birth") names(dat_wts)[sel] <- "Date_of_Birth" names(dat_wts) # [1] "mouseID" "Day" "weight_g" "Treatment" "Sex" "Litter" "Cage_Name" # [8] "Date_of_Birth" "Rack_position" ## re-order dat_wts <- dat_wts[ ,c( "Day", "mouseID", "Treatment", "Cage_Name", "Sex", "Litter", "Date_of_Birth", "Rack_position", "weight_g" )] dat_wts$Treatment <- factor(dat_wts$Treatment, levels = c("Control","Low","High"), ordered = TRUE) dat_wts$Rack_position <- factor(dat_wts$Rack_position, levels = c("low", "middle", "top"), ordered = TRUE) dat_wts$Cage_Name <- factor(dat_wts$Cage_Name, levels = c( paste0("C",1:6),paste0("L",1:6),paste0("H",1:6) ), ordered = TRUE) dat_wts$Sex <- factor(dat_wts$Sex, levels = c("female", "male"), labels = c("Female","Male")) str(dat_wts) # 'data.frame': 864 obs. of 9 variables: # $ Day : int 43333 43334 43340 43343 43347 43350 43354 43356 43361 43364 ... # $ mouseID : Factor w/ 54 levels "C1m1","C1m2",..: 1 1 1 1 1 1 1 1 1 1 ... # $ Treatment : Ord.factor w/ 3 levels "Control"<"Low"<..: 1 1 1 1 1 1 1 1 1 1 ... # $ Cage_Name : Ord.factor w/ 18 levels "C1"<"C2"<"C3"<..: 1 1 1 1 1 1 1 1 1 1 ... # $ Sex : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 2 2 2 ... # $ Litter : Factor w/ 15 levels "Blue 20","Blue 24",..: 2 2 2 2 2 2 2 2 2 2 ... # $ Date_of_Birth: POSIXct, format: "2018-08-02" "2018-08-02" "2018-08-02" "2018-08-02" ... # $ Rack_position: Ord.factor w/ 3 levels "low"<"middle"<..: 3 3 3 3 3 3 3 3 3 3 ... # $ weight_g : num 12.3 12.8 16 16.9 19 18.4 19.3 19.4 21 21.1 ... p <- ggplot(dat_wts, aes(x=Day,y=weight_g,color=Treatment)) + geom_line(aes(group = mouseID)) p cols <- c("Control" = "#f46d43", "Low" = "#66c2a5" , "High" = "#5e4fa2" ) # orange, aqua, dark-blue ## 18 shapes for cages shapes <- c(1:18) ## 15 shapes for litters #shapes <- c(1:15) p <- ggplot(dat_wts, aes(x=Day,y=weight_g,color=Treatment, shape=Cage_Name)) + geom_line(aes(group = mouseID)) p p$data$Cage_Name pp <- p + theme_bw() + facet_grid(rows=vars(Sex),cols=vars(Treatment)) + geom_point(aes(shape = Cage_Name), data=dat_wts) + #geom_point(shape = p$data$`Cage Name`) + #geom_point(shape = p$data$Litter) + scale_colour_manual(values = cols, guide = FALSE) + scale_shape_manual(values = shapes, name = "Enclosure") + labs(x = "Day", y = "Weight (g)") + theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.ticks.length=unit(-5, "pt"), axis.text.x = element_text(margin=margin(t = 10,r = 5,b = 2,l = 5,"pt")), axis.text.y = element_text(margin=margin(t = 5,r = 10,b = 10,l = 10,"pt")), #legend.position = "none", strip.background = element_rect(fill="white") ) # + #guides( shape = guide_legend(ncol = 2) ) pp ### add age to dataframe 'dat_wts' head( dat_wts$Date_of_Birth) # [1] "2018-08-02 UTC" "2018-08-02 UTC" "2018-08-02 UTC" "2018-08-02 UTC" "2018-08-02 UTC" "2018-08-02 UTC" class( dat_wts$Date_of_Birth ) # "POSIXct" "POSIXt" head( as.Date(dat_wts$Day, origin = "1899-12-30") ) # "2018-08-21" "2018-08-22" "2018-08-28" "2018-08-31" "2018-09-04" "2018-09-07" head( dat_wts$Date_of_Birth ) # "2018-08-02 UTC" "2018-08-02 UTC" "2018-08-02 UTC" "2018-08-02 UTC" "2018-08-02 UTC" "2018-08-02 UTC" head( as.Date(dat_wts$Date_of_Birth) ) # "2018-08-02" "2018-08-02" "2018-08-02" "2018-08-02" "2018-08-02" "2018-08-02" head( as.Date(dat_wts$Day, origin = "1899-12-30") ) - head( as.Date(dat_wts$Date_of_Birth) ) # Time differences in days # [1] 19 20 26 29 33 36 dat_wts$Age_days <- as.Date(dat_wts$Day, origin = "1899-12-30") - as.Date( dat_wts$Date_of_Birth ) dat_wts$Age_days <- as.numeric(dat_wts$Age_days) dat_wts$Age_weeks <- dat_wts$Age_days/7 # also add experimental day. Noting that Treatments commenced on Day 7 dat_wts$Exp_days <- as.Date(dat_wts$Day, origin = "1899-12-30") - as.Date( "2018-08-20" ) dat_wts$Exp_days <- as.numeric(dat_wts$Exp_days) dat_wts$Exp_days unique(dat_wts$Exp_days) # [1] 1 2 8 11 15 18 22 24 29 32 36 39 43 46 50 53 names(dat_wts) dat <- dat_wts[ , c("weight_g", "Exp_days", "Age_days", "mouseID", "Cage_Name", "Treatment", "Sex", "Litter", "Rack_position")] str(dat) # 'data.frame': 864 obs. of 9 variables: # $ weight_g : num 12.3 12.8 16 16.9 19 18.4 19.3 19.4 21 21.1 ... # $ Exp_days : num 1 2 8 11 15 18 22 24 29 32 ... # $ Age_days : num 19 20 26 29 33 36 40 42 47 50 ... # $ mouseID : Factor w/ 54 levels "C1m1","C1m2",..: 1 1 1 1 1 1 1 1 1 1 ... # $ Cage_Name : Ord.factor w/ 18 levels "C1"<"C2"<"C3"<..: 1 1 1 1 1 1 1 1 1 1 ... # $ Treatment : Ord.factor w/ 3 levels "Control"<"Low"<..: 1 1 1 1 1 1 1 1 1 1 ... # $ Sex : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 2 2 2 ... # $ Litter : Factor w/ 15 levels "Blue 20","Blue 24",..: 2 2 2 2 2 2 2 2 2 2 ... # $ Rack_position: Ord.factor w/ 3 levels "low"<"middle"<..: 3 3 3 3 3 3 3 3 3 3 ... cols <- c("Control" = "#f46d43", "Low" = "#66c2a5" , "High" = "#5e4fa2" ) # orange, aqua, dark-blue ## 18 shapes for cages shapes <- c(1:18) ## 15 shapes for litters #shapes <- c(1:15) p <- ggplot(dat_wts, aes(x=Age_days,y=weight_g,color=Treatment, shape=Cage_Name)) + geom_line(aes(group = mouseID)) p p$data$Cage_Name pp <- p + theme_bw() + facet_grid(rows=vars(Sex),cols=vars(Treatment)) + geom_point(aes(shape = Cage_Name), data=dat_wts) + #geom_point(shape = p$data$`Cage Name`) + #geom_point(shape = p$data$Litter) + scale_colour_manual(values = cols, guide = FALSE) + scale_shape_manual(values = shapes, name = "Enclosure") + labs(x = "Age (days)", y = "Weight (g)") + theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.ticks.length=unit(-5, "pt"), axis.text.x = element_text(margin=margin(t = 10,r = 5,b = 2,l = 5,"pt")), axis.text.y = element_text(margin=margin(t = 5,r = 10,b = 10,l = 10,"pt")), #legend.position = "none", strip.background = element_rect(fill="white") ) # + #guides( shape = guide_legend(ncol = 2) ) pp ggsave(plot=pp, filename = paste0("plots/","growth-curves-by-Treatment-and-Sex.tiff"), width = 16, height = 12, units = "cm", dpi = 600, compression = "lzw") #------------------------ #### Exploratory model of mice growth curves #------------------------ # ... from above str(dat) # 'data.frame': 864 obs. of 9 variables: # $ weight_g : num 12.3 12.8 16 16.9 19 18.4 19.3 19.4 21 21.1 ... # $ Exp_days : num 1 2 8 11 15 18 22 24 29 32 ... # $ Age_days : num 19 20 26 29 33 36 40 42 47 50 ... # $ mouseID : Factor w/ 54 levels "C1m1","C1m2",..: 1 1 1 1 1 1 1 1 1 1 ... # $ Cage_Name : Ord.factor w/ 18 levels "C1"<"C2"<"C3"<..: 1 1 1 1 1 1 1 1 1 1 ... # $ Treatment : Ord.factor w/ 3 levels "Control"<"Low"<..: 1 1 1 1 1 1 1 1 1 1 ... # $ Sex : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 2 2 2 ... # $ Litter : Factor w/ 15 levels "Blue 20","Blue 24",..: 2 2 2 2 2 2 2 2 2 2 ... # $ Rack_position: Ord.factor w/ 3 levels "low"<"middle"<..: 3 3 3 3 3 3 3 3 3 3 ... # library(caret);packageVersion("caret") # '6.0.80' # library(randomForest);packageVersion("randomForest") # '4.6.14' # library(boot);packageVersion("boot") # '1.3.20' ## for exploratory model, strip the pre-exposure/pre-treatment days out of data record dat.mod <- dat unique(dat.mod$Exp_days) # [1] 1 2 8 11 15 18 22 24 29 32 36 39 43 46 50 53 sel <- which(dat.mod$Exp_days %in% c(1,2)) # qty 108 dat.mod <- dat.mod[-sel, ] dim(dat.mod) # 756 9 names(dat.mod) # [1] "weight_g" "Exp_days" "Age_days" "mouseID" "Cage_Name" "Treatment" "Sex" # [8] "Litter" "Rack_position" ## fit Random Forest regression model and check variable importance plot - to assess importance of treatment? library(randomForest);packageVersion("randomForest") # '4.6.14' ## don't include factor mouseID - as modelling tool will not handle categorical ## predictor with such a large number of categories dat1 <- dat.mod[ , c( "weight_g", "Age_days", "Cage_Name", "Treatment", "Sex", "Litter", "Rack_position")] names(dat1) #[1] "weight_g" "Age_days" "Cage_Name" "Treatment" "Sex" "Litter" "Rack_position" model1 <- randomForest(weight_g ~ ., data = dat1, importance = TRUE) model1 # Call: # randomForest(formula = weight_g ~ ., data = dat1, importance = TRUE) # Type of random forest: regression # Number of trees: 500 # No. of variables tried at each split: 2 # # Mean of squared residuals: 0.5773832 # % Var explained: 93.29 varImpPlot(model1) varImpPlot(model1,type=2) # Treatment has only marginal importance, lowest compared to others importanceOrder=order(-model1$importance) names=rownames(model1$importance)[importanceOrder] names<-na.omit(names) par(mfrow=c(2, 3),mar=c(4,3,2,1)+0.1) for (name in names) { partialPlot(model1, dat1, eval(name), main="", xlab=name) } par(par.default) dat1$Treatment # Levels: Control < Low < High dat1$Rack_position # Levels: low < middle < top grDevices::tiff(file=paste0("plots/","RF-PartialPlots-all-data-in-vFINAL.tif"), width = 16, height = 14, units = "cm", res = 600, compression = "lzw") par(mfrow=c(2, 3),mar=c(4,3,2,1)+0.1) for (name in names) { partialPlot(model1, dat1, eval(name), main="", xlab=name) } dev.off() par(par.default) # Mouse weights: # - increase with age # - are higher in males # - vary randomly with litters # - vary randomly with cages # - decrease with increasing height on the rack (low < middle < top) # - increase with Treatment biodiversity (Control < Low < High) # (Higher effect/important at top, towards marginal effect/importance for lower variables) ## detach library to avoid conflict with margin() calls in ggplot detach("package:randomForest", unload=TRUE) #------------------------ #### Behaviour testing #------------------------ ### Open Field of.post <- read_excel(path= paste0(datadir,"/","Open-field-post-exposure.xlsx"), sheet=1, range="A1:AE55", col_names = TRUE) of.post <- as.data.frame(of.post) str(of.post) of.post$Treatment <- factor(of.post$Treatment, levels = c("Control", "Low" , "High"), ordered = TRUE ) # change char to factor of.post[ , c( "Code", "Mouse", "ID", "Sex", "Litter" )] <- lapply(of.post[ , c("Code", "Mouse", "ID", "Sex", "Litter" )], FUN = factor) names(mice_info) # [1] "mouseID" "Animal no" "Sex" # [4] "Litter" "Date of Birth" "Cage no" # [7] "Cage Name" "Treatment" "Rack" # [10] "Ear notch ID" "Ear notch pos" "Age_received_20_Aug" # [13] "Age_first_soil_exposure_27_Aug" "Rack_position" of.post <- merge(x = of.post, y = mice_info[ ,c( "mouseID", "Date of Birth", "Cage no", "Cage Name", "Rack", "Age_received_20_Aug", "Age_first_soil_exposure_27_Aug", "Rack_position")], by.x = "ID", by.y = "mouseID", all.x = TRUE ) str(of.post) # 'data.frame': 54 obs. of 38 variables: # $ ID : Factor w/ 54 levels "C1m1","C1m2",..: 1 2 3 4 5 6 7 8 9 10 ... # $ Test_no : num 1 2 3 13 14 15 25 26 27 34 ... # $ Treatment : Ord.factor w/ 3 levels "Control"<"Low"<..: 1 1 1 1 1 1 1 1 1 1 ... # $ Code : Factor w/ 18 levels "C1","C2","C3",..: 1 1 1 2 2 2 3 3 3 4 ... # $ Mouse : Factor w/ 3 levels "m1","m2","m3": 1 2 3 1 2 3 1 2 3 1 ... # $ Sex : Factor w/ 2 levels "female","male": 2 2 2 1 1 1 2 2 2 1 ... # $ Litter : Factor w/ 15 levels "Blue 20","Blue 24",..: 2 3 11 11 12 14 9 10 1 15 ... # $ Stage : chr "First stage" "First stage" "First stage" "First stage" ... # $ Trial : num 1 1 1 1 1 1 1 1 1 1 ... # $ Apparatus : chr "Open field 1" "Open field 2" "Open field 3" "Open field 1" ... # $ Duration : num 600 600 600 600 600 600 600 600 600 600 ... # $ Distance : num 15.87 23.61 16.8 7.71 5.25 ... # $ Dist_Corrected : num 15.43 22.95 16.33 7.5 5.11 ... # $ Corners_entries : num 72 73 56 33 60 50 55 35 82 70 ... # $ Corners_time : num 352 174 162 533 182 ... # $ Corners_distance : num 6.68 4.04 3.06 3.77 1.34 ... # $ Corners_dist_Corrected : num 6.5 3.93 2.98 3.66 1.3 ... # $ Corners_head_time : num 254.4 145.1 131.6 462.9 80.6 ... # $ Corners_head_entries : num 103 113 89 81 57 74 65 72 89 107 ... # $ Walls_entries : num 74 148 97 38 118 86 82 72 106 74 ... # $ Walls_time : num 235 296.8 366.5 56.4 255.8 ... # $ Walls_distance : num 8.91 12.31 10.26 3.5 2.65 ... # $ Walls_dist_Corrected : num 8.66 11.97 9.97 3.41 2.58 ... # $ Walls_head_time : num 329 283 344 119 231 ... # $ Walls_head_entries : num 111 210 162 97 163 111 171 164 134 135 ... # $ Centre_entries : num 4 79 43 6 60 36 26 37 23 4 ... # $ Centre_time : num 13.2 128.8 71.6 10.4 162.5 ... # $ Centre_distance : num 0.484 7.744 3.599 0.441 1.966 ... # $ Centre_dist_Corrected : num 0.471 7.529 3.499 0.429 1.911 ... # $ Centre_head_time : num 16.7 157.3 123.9 17.9 288.4 ... # $ Centre_head_entries : num 9 107 86 19 107 43 113 96 50 29 ... # $ Date of Birth : POSIXct, format: "2018-08-02" "2018-08-04" "2018-08-02" ... # $ Cage no : num 1 1 1 5 5 5 9 9 9 12 ... # $ Cage Name : Factor w/ 18 levels "C1","C2","C3",..: 1 1 1 2 2 2 3 3 3 4 ... # $ Rack : num 1 1 1 2 2 2 3 3 3 4 ... # $ Age_received_20_Aug : num 2.6 2.3 2.6 2.6 2.6 3 4.1 4.1 4.1 3.7 ... # $ Age_first_soil_exposure_27_Aug: num 3.6 3.3 3.6 3.6 3.6 4 5.1 5.1 5.1 4.7 ... # $ Rack_position : Factor w/ 3 levels "low","middle",..: 3 3 3 2 2 2 1 1 1 1 ... ## Note that 'Corrected' distances are (35/36)*distances reported by the ANY-maze software. ## This is due to setting the electronic template as measuring 36 cm across to facilitate mapping of 4 x 4 ## equal zones in the Open Field ANY-maze template. However, the apparatus actually measured 35 cm across. ## Therefore software-reported distances require correction by a factor of x (35/36). p <- ggplot(data = of.post, aes(x = Treatment, y = Centre_time)) + geom_boxplot() + facet_grid( ~ Sex) + theme_bw() p getwd() # "C:/Workspace/PROJ/PAPER-MICRO-MICE/modelling" ggsave(plot=p, filename = paste0("plots/","boxplots-Open-field--Centre-time--by-sex.tiff"), width = 12, height = 6, units = "cm", dpi = 600, compression = "lzw") ## isolate data for females names(of.post) # [1] "ID" "Test_no" "Treatment" # [4] "Code" "Mouse" "Sex" # [7] "Litter" "Stage" "Trial" # [10] "Apparatus" "Duration" "Distance" # [13] "Dist_Corrected" "Corners_entries" "Corners_time" # [16] "Corners_distance" "Corners_dist_Corrected" "Corners_head_time" # [19] "Corners_head_entries" "Walls_entries" "Walls_time" # [22] "Walls_distance" "Walls_dist_Corrected" "Walls_head_time" # [25] "Walls_head_entries" "Centre_entries" "Centre_time" # [28] "Centre_distance" "Centre_dist_Corrected" "Centre_head_time" # [31] "Centre_head_entries" "Date of Birth" "Cage no" # [34] "Cage Name" "Rack" "Age_received_20_Aug" # [37] "Age_first_soil_exposure_27_Aug" "Rack_position" sel <- which(of.post$Sex == "female") of_ct_fem <- of.post[sel, c("ID", "Treatment", "Centre_time")] ## Kruskal-Wallis multiple comparison test ## with post-hoc Dunn test str(of_ct_fem) # 'data.frame': 27 obs. of 3 variables: # $ ID : Factor w/ 54 levels "C1m1","C1m2",..: 4 5 6 10 11 12 13 14 15 25 ... # $ Treatment : Ord.factor w/ 3 levels "Control"<"Low"<..: 1 1 1 1 1 1 1 1 1 3 ... # $ Centre_time: num 10.4 162.5 88.7 12.1 3.3 ... of_ct_fem$group <- as.character(of_ct_fem$Treatment) str(of_ct_fem) # 'data.frame': 27 obs. of 4 variables: # $ ID : Factor w/ 54 levels "C1m1","C1m2",..: 4 5 6 10 11 12 13 14 15 25 ... # $ Treatment : Ord.factor w/ 3 levels "Control"<"Low"<..: 1 1 1 1 1 1 1 1 1 3 ... # $ Centre_time: num 10.4 162.5 88.7 12.1 3.3 ... # $ group : chr "Control" "Control" "Control" "Control" ... p <- ggplot(data = of_ct_fem, aes(x = Treatment, y = Centre_time)) + geom_boxplot() + theme_bw() p ## Exclude outliers from significance testing - using default outlier classification ## outlier is defined as values outside 1.5 times the interquartile range above the upper quartile or below the lower quartile. bp <- boxplot(of_ct_fem$Centre_time ~ of_ct_fem$group) bp$out # 162.5 88.7 sel <- which(of_ct_fem$Centre_time %in% bp$out) of_ct_fem$Centre_time[sel] # 162.5 88.7 of_ct_fem[sel, ] # ID Treatment Centre_time group # 5 C2m2 Control 162.5 Control # 6 C2m3 Control 88.7 Control # exclude outliers of_ct_fem__ex_out <- of_ct_fem[-sel, ] str(of_ct_fem__ex_out) # 'data.frame': 25 obs. of 4 variables: # $ ID : Factor w/ 54 levels "C1m1","C1m2",..: 4 10 11 12 13 14 15 25 26 27 ... # $ Treatment : Ord.factor w/ 3 levels "Control"<"Low"<..: 1 1 1 1 1 1 1 3 3 3 ... # $ Centre_time: num 10.4 12.1 3.3 34.8 10.6 27.1 25.5 33.5 28.5 61.4 ... # $ group : chr "Control" "Control" "Control" "Control" ... table(of_ct_fem__ex_out$Treatment) # Control Low High # 7 9 9 # Kruskal-Wallis test kt <- kruskal.test( Centre_time ~ factor(group), of_ct_fem__ex_out) # Kruskal Wallis test kt # Kruskal-Wallis rank sum test # # data: Centre_time by factor(group) # Kruskal-Wallis chi-squared = 8.0832, df = 2, p-value = 0.01757 ## Dunn Test uses factor vector or non-numeric vector that can be coerced to a factor vector pt <- dunnTest( Centre_time ~ group, data= of_ct_fem__ex_out, method = "bonferroni" ) # Warning message: # group was coerced to a factor. pt # Dunn (1964) Kruskal-Wallis multiple comparison # p-values adjusted with the Bonferroni method. # # Comparison Z P.unadj P.adj # 1 Control - High -2.7303879 0.006325984 0.01897795 # 2 Control - Low -2.1911577 0.028440381 0.08532114 # 3 High - Low 0.5764614 0.564303384 1.00000000 pt$dtres pt <- pt$res pt cldList(comparison = pt$Comparison, p.value = pt$P.adj, threshold = 0.05) # Group Letter MonoLetter # 1 Control a a # 2 High b b # 3 Low ab ab ## Include significance letters in plot cols <- c("Control" = "#f46d43", "Low" = "#66c2a5" , "High" = "#5e4fa2" ) # orange, aqua, dark-blue set.seed(4321) p <- ggplot(data = of_ct_fem, aes(x = Treatment, y = Centre_time)) + geom_boxplot() + #geom_point(aes(colour = Treatment)) + geom_jitter(data=of_ct_fem__ex_out, aes(colour = Treatment), width=0.3, height=0) + scale_color_manual(values = cols, guide = FALSE) + theme_classic() + labs(y="Time in centre (s)", x=NULL) + annotate(geom="text", x= 1.1, y= 50, label = "a", hjust=0, vjust=0, size = 3) + annotate(geom="text", x= 2.1, y= 100, label = "ab", hjust=0, vjust=0, size = 3) + annotate(geom="text", x= 3.1, y= 70, label = "b", hjust=0, vjust=0, size = 3) + theme( plot.margin = margin(t = 2, r = 2, b = 2, l = 5, "pt"), axis.ticks.length=unit(-3, "pt"), axis.ticks.x.bottom = element_blank(), axis.text.x = element_text(margin=margin(t = 6,r = 5,b = 2,l = 5,"pt"), size = rel(0.85)), # , size = rel(0.9)) axis.text.y = element_text(margin=margin(t = 5,r = 5,b = 5,l = 2,"pt"), size = rel(0.75)), axis.title.y = element_text(margin=margin(t = 5,r = 2,b = 5,l = 2,"pt"), size = rel(0.8)) ) p ggsave(plot=p, filename = paste0("plots/","Open-Field-Centre-Time-Female-Only-with-signif-vFINAL.tiff"), width = 5.5, height = 4, units = "cm", dpi = 600, compression = "lzw") # # # # # # # # # ## isolate males and subset data names(of.post) sel <- which(of.post$Sex == "male") of_ct_male <- of.post[sel, c("ID", "Treatment", "Centre_time")] of_ct_male$group <- as.character(of_ct_male$Treatment) str(of_ct_male) # 'data.frame': 27 obs. of 4 variables: # $ ID : Factor w/ 54 levels "C1m1","C1m2",..: 1 2 3 7 8 9 16 17 18 19 ... # $ Treatment : Ord.factor w/ 3 levels "Control"<"Low"<..: 1 1 1 1 1 1 1 1 1 3 ... # $ Centre_time: num 13.2 128.8 71.6 46.9 208.3 ... # $ group : chr "Control" "Control" "Control" "Control" ... ## Exclude boxplot outliers from Kruskal Wallis test bp <- boxplot(of_ct_male$Centre_time ~ of_ct_male$group) bp$out # 208.3 289.9 279.0 sel <- which(of_ct_male$Centre_time %in% bp$out) of_ct_male$Centre_time[sel] # 208.3 289.9 279.0 of_ct_male[sel, ] # ID Treatment Centre_time group # 8 C3m2 Control 208.3 Control # 23 H2m2 High 289.9 High # 33 H5m3 High 279.0 High # exclude outliers of_ct_male__ex_out <- of_ct_male[-sel, ] str(of_ct_male__ex_out) # 'data.frame': 24 obs. of 4 variables: # $ ID : Factor w/ 54 levels "C1m1","C1m2",..: 1 2 3 7 9 16 17 18 19 20 ... # $ Treatment : Ord.factor w/ 3 levels "Control"<"Low"<..: 1 1 1 1 1 1 1 1 3 3 ... # $ Centre_time: num 13.2 128.8 71.6 46.9 68.2 ... # $ group : chr "Control" "Control" "Control" "Control" ... # Kruskal-Wallis test kt <- kruskal.test( Centre_time ~ factor(group), of_ct_male__ex_out) # Kruskal Wallis test kt # Kruskal-Wallis rank sum test # # data: Centre_time by factor(group) # Kruskal-Wallis chi-squared = 2.4427, df = 2, p-value = 0.2948 ## i.e. no significant difference # # # # # # # # # # # # # # # # # # ## Plot Open Field - Time in Centre - Female & Male ## plot using facet_grid # don't colour outliers bp <- boxplot(of.post$Centre_time ~ of.post$Treatment + of.post$Sex) bp$out # 162.5 88.7 208.3 289.9 279.0 sel <- which(of.post$Centre_time %in% bp$out) of.post$Centre_time[sel] # 162.5 88.7 208.3 289.9 279.0 cols <- c("Control" = "#f46d43", "Low" = "#66c2a5" , "High" = "#5e4fa2" ) # orange, aqua, dark-blue of.post.COPY <- of.post of.post.COPY$Sex <- factor(of.post.COPY$Sex, levels = c("female", "male"), labels=c("Female", "Male")) set.seed(1234) p <- ggplot(data=of.post.COPY, aes(x = Treatment, y = Centre_time)) + geom_boxplot(outlier.size=1) + facet_grid(~Sex ) + geom_jitter(data = of.post.COPY[-sel, ], aes(colour = Treatment), width=0.3, height=0, size=1) + scale_color_manual(values = cols, guide = FALSE) + theme_classic() + labs(y="Time in centre (s)", x=NULL) + theme( plot.margin = margin(t = 2, r = 2, b = 2, l = 5, "pt"), axis.ticks.length=unit(-3, "pt"), axis.ticks.x.bottom = element_blank(), axis.text.x = element_text(margin=margin(t = 5,r = 5,b = 2,l = 5,"pt"), size = rel(0.9)), # , size = rel(0.9)) axis.text.y = element_text(margin=margin(t = 5,r = 5,b = 5,l = 2,"pt"), size = rel(0.8)), axis.title.y = element_text(margin=margin(t = 5,r = 2,b = 5,l = 2,"pt"), size = rel(0.8)), strip.background = element_rect(fill="white", linetype = "blank") ) p # add significance letters grid.text(label = "a", x = unit(0.25, "npc") , y = unit(0.35,"npc"), gp=gpar(fontsize=10.5) ) grid.text(label = "ab", x = unit(0.39, "npc") , y = unit(0.45,"npc"), gp=gpar(fontsize=10.5) ) grid.text(label = "b", x = unit(0.51, "npc") , y = unit(0.40,"npc"), gp=gpar(fontsize=10.5) ) dev.print(tiff, file = paste0("plots/","Open-Field-Time-in-Centre-Results-Female-and-Male-vFINAL.tiff"), width = 8.7, height = 6, units = "cm", res=600, compression="lzw") # # # # # # # # # # # # # # # # # # # # # # # # # # # ### Compare pre- to post- Open Fields results of.pre <- read_excel(path= paste0(datadir,"/","Open-field-pre-exposure.xlsx"), sheet=1, range="A1:AE55", col_names = TRUE) of.pre <- as.data.frame(of.pre) str(of.pre) of.pre$Treatment <- factor(of.pre$Treatment, levels = c("Control", "Low" , "High"), ordered = TRUE ) # change char to factor of.pre[ , c( "Code", "Mouse", "ID", "Sex", "Litter" )] <- lapply(of.pre[ , c("Code", "Mouse", "ID", "Sex", "Litter" )], FUN = factor) names(mice_info) # [1] "mouseID" "Animal no" "Sex" # [4] "Litter" "Date of Birth" "Cage no" # [7] "Cage Name" "Treatment" "Rack" # [10] "Ear notch ID" "Ear notch pos" "Age_received_20_Aug" # [13] "Age_first_soil_exposure_27_Aug" "Rack_position" of.pre <- merge(x = of.pre, y = mice_info[ ,c( "mouseID", "Date of Birth", "Cage no", "Cage Name", "Rack", "Age_received_20_Aug", "Age_first_soil_exposure_27_Aug", "Rack_position")], by.x = "ID", by.y = "mouseID", all.x = TRUE ) str(of.pre) # 'data.frame': 54 obs. of 38 variables: # $ ID : Factor w/ 54 levels "C1m1","C1m2",..: 1 2 3 4 5 6 7 8 9 10 ... # $ Test_no : num 1 2 3 13 14 15 25 26 27 34 ... # $ Treatment : Ord.factor w/ 3 levels "Control"<"Low"<..: 1 1 1 1 1 1 1 1 1 1 ... # $ Code : Factor w/ 18 levels "C1","C2","C3",..: 1 1 1 2 2 2 3 3 3 4 ... # $ Mouse : Factor w/ 3 levels "m1","m2","m3": 1 2 3 1 2 3 1 2 3 1 ... # $ Sex : Factor w/ 2 levels "female","male": 2 2 2 1 1 1 2 2 2 1 ... # $ Litter : Factor w/ 15 levels "Blue 20","Blue 24",..: 2 3 11 11 12 14 9 10 1 15 ... # $ Stage : chr "First stage" "First stage" "First stage" "First stage" ... # $ Trial : num 1 1 1 1 1 1 1 1 1 1 ... # $ Apparatus : chr "Open field 1" "Open field 3" "Open field 2" "Open field 1" ... # $ Duration : num 600 600 600 600 600 600 600 600 600 600 ... # $ Distance : num 29.02 22.24 8.25 21.07 14.46 ... # $ Dist_Corrected : num 28.21 21.63 8.02 20.49 14.06 ... # $ Corners_entries : num 88 53 33 66 63 66 93 88 62 80 ... # $ Corners_time : num 192 161 487 416 387 ... # $ Corners_distance : num 6.75 4.36 3.81 8.11 6.36 ... # $ Corners_dist_Corrected : num 6.56 4.24 3.7 7.89 6.18 ... # $ Corners_head time : num 174 154 424 378 309 ... # $ Corners_head entries : num 104 58 63 101 81 74 103 117 75 91 ... # $ Walls_entries : num 138 97 32 79 66 117 149 135 132 125 ... # $ Walls_time : num 339 342 113 159 206 ... # $ Walls_distance : num 16.19 10.96 4.5 10.94 7.82 ... # $ Walls_dist_Corrected : num 15.74 10.65 4.37 10.64 7.6 ... # $ Walls_head time : num 339 323 168 193 259 ... # $ Walls_head entries : num 173 96 57 118 85 135 208 213 149 163 ... # $ Centre_entries : num 54 49 0 14 4 55 62 53 76 52 ... # $ Centre_time : num 68.9 96.3 0 24.8 6.8 ... # $ Centre_distance : num 6.3 7.088 0 2.075 0.283 ... # $ Centre_dist_Corrected : num 6.125 6.891 0 2.017 0.275 ... # $ Centre_head time : num 86.8 122.7 0 29.1 8.9 ... # $ Centre_head entries : num 74 49 0 20 11 76 117 107 90 82 ... # $ Date of Birth : POSIXct, format: "2018-08-02" "2018-08-04" "2018-08-02" "2018-08-02" ... # $ Cage no : num 1 1 1 5 5 5 9 9 9 12 ... # $ Cage Name : Factor w/ 18 levels "C1","C2","C3",..: 1 1 1 2 2 2 3 3 3 4 ... # $ Rack : num 1 1 1 2 2 2 3 3 3 4 ... # $ Age_received_20_Aug : num 2.6 2.3 2.6 2.6 2.6 3 4.1 4.1 4.1 3.7 ... # $ Age_first_soil_exposure_27_Aug: num 3.6 3.3 3.6 3.6 3.6 4 5.1 5.1 5.1 4.7 ... # $ Rack_position : Factor w/ 3 levels "low","middle",..: 3 3 3 2 2 2 1 1 1 1 ... # Note that 'Corrected' distances are (35/36)*distances reported by the ANY-maze software. # This is due to setting the electronic template as measuring 36 cm across to facilitate mapping of 4 x 4 # equal zones in the Open Field ANY-maze template. However, the apparatus actually measured 35 cm across. # Therefore software-reported distances require correction by a factor of x (35/36). p <- ggplot(data = of.pre, aes(x = Treatment, y = Centre_time)) + geom_boxplot() + facet_grid( ~ Sex) + theme_bw() p ## isolate data for females and males respectively - to exclude outliers and test for difference names(of.pre) # [1] "ID" "Test_no" "Treatment" # [4] "Code" "Mouse" "Sex" # [7] "Litter" "Stage" "Trial" # [10] "Apparatus" "Duration" "Distance" # [13] "Dist_Corrected" "Corners_entries" "Corners_time" # [16] "Corners_distance" "Corners_dist_Corrected" "Corners_head time" # [19] "Corners_head entries" "Walls_entries" "Walls_time" # [22] "Walls_distance" "Walls_dist_Corrected" "Walls_head time" # [25] "Walls_head entries" "Centre_entries" "Centre_time" # [28] "Centre_distance" "Centre_dist_Corrected" "Centre_head time" # [31] "Centre_head entries" "Date of Birth" "Cage no" # [34] "Cage Name" "Rack" "Age_received_20_Aug" # [37] "Age_first_soil_exposure_27_Aug" "Rack_position" sel <- which(of.pre$Sex == "female") of_ct_fem_pre <- of.pre[sel, c("ID", "Treatment", "Centre_time")] ## Kruskal-Wallis multiple comparison test ## with post-hoc Dunn test str(of_ct_fem_pre) # 'data.frame': 27 obs. of 3 variables: # $ ID : Factor w/ 54 levels "C1m1","C1m2",..: 4 5 6 10 11 12 13 14 15 25 ... # $ Treatment : Ord.factor w/ 3 levels "Control"<"Low"<..: 1 1 1 1 1 1 1 1 1 3 ... # $ Centre_time: num 24.8 6.8 87.9 73.5 2.8 ... of_ct_fem_pre$group <- as.character(of_ct_fem_pre$Treatment) str(of_ct_fem_pre) # 'data.frame': 27 obs. of 4 variables: # $ ID : Factor w/ 54 levels "C1m1","C1m2",..: 4 5 6 10 11 12 13 14 15 25 ... # $ Treatment : Ord.factor w/ 3 levels "Control"<"Low"<..: 1 1 1 1 1 1 1 1 1 3 ... # $ Centre_time: num 24.8 6.8 87.9 73.5 2.8 ... # $ group : chr "Control" "Control" "Control" "Control" ... p <- ggplot(data = of_ct_fem_pre, aes(x = Treatment, y = Centre_time)) + geom_boxplot() + theme_bw() p ## Exclude outliers from significance testing - using default outlier classification ## outlier is defined as values outside 1.5 times the interquartile range above the upper quartile or below the lower quartile. bp <- boxplot(of_ct_fem_pre$Centre_time ~ of_ct_fem_pre$group) bp$out # 110 sel <- which(of_ct_fem_pre$Centre_time %in% bp$out) of_ct_fem_pre$Centre_time[sel] # 110 of_ct_fem_pre[sel, ] # ID Treatment Centre_time group # 39 L1m3 Low 110 Low # exclude outliers of_ct_fem_pre__ex_out <- of_ct_fem_pre[-sel, ] str(of_ct_fem_pre__ex_out) # 'data.frame': 26 obs. of 4 variables: # $ ID : Factor w/ 54 levels "C1m1","C1m2",..: 4 5 6 10 11 12 13 14 15 25 ... # $ Treatment : Ord.factor w/ 3 levels "Control"<"Low"<..: 1 1 1 1 1 1 1 1 1 3 ... # $ Centre_time: num 24.8 6.8 87.9 73.5 2.8 ... # $ group : chr "Control" "Control" "Control" "Control" ... table(of_ct_fem_pre__ex_out$Treatment) # Control Low High # 9 8 9 # Kruskal-Wallis test kt <- kruskal.test( Centre_time ~ factor(group), of_ct_fem_pre__ex_out) # Kruskal Wallis test kt # Kruskal-Wallis rank sum test # # data: Centre_time by factor(group) # Kruskal-Wallis chi-squared = 0.52137, df = 2, p-value = 0.7705 # # # # # # # # # ## isolate males and subset data names(of.pre) sel <- which(of.pre$Sex == "male") of_ct_male_pre <- of.pre[sel, c("ID", "Treatment", "Centre_time")] of_ct_male_pre$group <- as.character(of_ct_male_pre$Treatment) str(of_ct_male_pre) # 'data.frame': 27 obs. of 4 variables: # $ ID : Factor w/ 54 levels "C1m1","C1m2",..: 1 2 3 7 8 9 16 17 18 19 ... # $ Treatment : Ord.factor w/ 3 levels "Control"<"Low"<..: 1 1 1 1 1 1 1 1 1 3 ... # $ Centre_time: num 68.9 96.3 0 139.1 96.2 ... # $ group : chr "Control" "Control" "Control" "Control" ... ## Exclude boxplot outliers from Kruskal Wallis test bp <- boxplot(of_ct_male_pre$Centre_time ~ of_ct_male_pre$group) bp$out # 0.0 139.1 183.4 181.0 sel <- which(of_ct_male_pre$Centre_time %in% bp$out) of_ct_male_pre$Centre_time[sel] # 0.0 139.1 183.4 181.0 of_ct_male_pre[sel, ] # ID Treatment Centre_time group # 3 C1m3 Control 0.0 Control # 7 C3m1 Control 139.1 Control # 9 C3m3 Control 183.4 Control # 40 L2m1 Low 181.0 Low # exclude outliers of_ct_male_pre__ex_out <- of_ct_male_pre[-sel, ] str(of_ct_male_pre__ex_out) # 'data.frame': 23 obs. of 4 variables: # $ ID : Factor w/ 54 levels "C1m1","C1m2",..: 1 2 8 16 17 18 19 20 21 22 ... # $ Treatment : Ord.factor w/ 3 levels "Control"<"Low"<..: 1 1 1 1 1 1 3 3 3 3 ... # $ Centre_time: num 68.9 96.3 96.2 70 92.9 45.8 34.7 4 71.1 3 ... # $ group : chr "Control" "Control" "Control" "Control" ... table(of_ct_male_pre__ex_out$Treatment) # Control Low High # 6 8 9 # Kruskal-Wallis test kt <- kruskal.test( Centre_time ~ factor(group), of_ct_male_pre__ex_out) # Kruskal Wallis test kt # Kruskal-Wallis rank sum test # # data: Centre_time by factor(group) # Kruskal-Wallis chi-squared = 2.4834, df = 2, p-value = 0.2889 ## i.e. no significant difference # # # # # # # # # # # # # # # # # # ## Plot Open Field - BASELINE - Time in Centre - Female & Male ## plot using facet_grid # don't colour outliers bp <- boxplot(of.pre$Centre_time ~ of.pre$Treatment + of.pre$Sex) bp$out # 110.0 0.0 139.1 183.4 181.0 sel <- which(of.pre$Centre_time %in% bp$out) of.pre$Centre_time[sel] # 0.0 139.1 183.4 110.0 181.0 cols <- c("Control" = "#f46d43", "Low" = "#66c2a5" , "High" = "#5e4fa2" ) # orange, aqua, dark-blue of.pre.COPY <- of.pre of.pre.COPY$Sex <- factor(of.pre.COPY$Sex, levels = c("female", "male"), labels=c("Female", "Male")) set.seed(4321) p <- ggplot(data=of.pre.COPY, aes(x = Treatment, y = Centre_time)) + geom_boxplot(outlier.size=1) + facet_grid(~Sex ) + geom_jitter(data = of.pre.COPY[-sel, ], aes(colour = Treatment), width=0.3, height=0, size=1) + scale_color_manual(values = cols, guide = FALSE) + theme_classic() + labs(y="Time in centre (s)", x=NULL) + theme( plot.margin = margin(t = 2, r = 2, b = 2, l = 5, "pt"), axis.ticks.length=unit(-3, "pt"), axis.ticks.x.bottom = element_blank(), axis.text.x = element_text(margin=margin(t = 5,r = 5,b = 2,l = 5,"pt"), size = rel(0.9)), # , size = rel(0.9)) axis.text.y = element_text(margin=margin(t = 5,r = 5,b = 5,l = 2,"pt"), size = rel(0.8)), axis.title.y = element_text(margin=margin(t = 5,r = 2,b = 5,l = 2,"pt"), size = rel(0.8)), strip.background = element_rect(fill="white", linetype = "blank") ) p dev.print(tiff, file = paste0("plots/","Open-Field-BASELINE-Time-in-Centre-Results-Female-and-Male-vFINAL.tiff"), width = 8.7, height = 6, units = "cm", res=600, compression="lzw") # # # # # # # # # # # # # # # # # # ### Activity levels ## Distance travelled ## plot using facet_grid # don't colour outliers bp <- boxplot(of.post$Dist_Corrected ~ of.post$Treatment + of.post$Sex) bp$out # 6.850278 sel <- which(of.post$Dist_Corrected %in% bp$out) of.post$Dist_Corrected[sel] # 6.850278 # Kruskal-Wallis test kt <- kruskal.test( Dist_Corrected ~ factor(Treatment), of.post[-sel, ]) # Kruskal Wallis test kt # Kruskal-Wallis rank sum test # data: Dist_Corrected by factor(Treatment) # Kruskal-Wallis chi-squared = 0.70209, df = 2, p-value = 0.704 # Males & exclude outlier kt <- kruskal.test( Dist_Corrected ~ factor(Treatment), of.post[-which(of.post$Dist_Corrected %in% bp$out | of.post$Sex == "female"), ]) # Kruskal Wallis test kt # Kruskal-Wallis rank sum test # data: Dist_Corrected by factor(Treatment) # Kruskal-Wallis chi-squared = 4.0465, df = 2, p-value = 0.1322 # Females & exclude outlier kt <- kruskal.test( Dist_Corrected ~ factor(Treatment), of.post[-which(of.post$Dist_Corrected %in% bp$out | of.post$Sex == "male"), ]) # Kruskal Wallis test kt # Kruskal-Wallis rank sum test # data: Dist_Corrected by factor(Treatment) # Kruskal-Wallis chi-squared = 1.4215, df = 2, p-value = 0.4913 cols <- c("Control" = "#f46d43", "Low" = "#66c2a5" , "High" = "#5e4fa2" ) # orange, aqua, dark-blue of.post.COPY <- of.post of.post.COPY$Sex <- factor(of.post.COPY$Sex, levels = c("female", "male"), labels=c("Female", "Male")) bp <- boxplot(of.post$Dist_Corrected ~ of.post$Treatment + of.post$Sex) sel <- which(of.post$Dist_Corrected %in% bp$out) set.seed(4321) p <- ggplot(data=of.post.COPY, aes(x = Treatment, y = Dist_Corrected)) + geom_boxplot( outlier.size=1) + facet_grid(~Sex ) + geom_jitter(data = of.post.COPY[-sel, ], aes(colour = Treatment), width=0.3, height=0, size=1) + scale_color_manual(values = cols, guide = FALSE) + theme_classic() + labs(y="Distance travelled (m)", x=NULL) + theme( plot.margin = margin(t = 2, r = 2, b = 2, l = 5, "pt"), axis.ticks.length=unit(-3, "pt"), axis.ticks.x.bottom = element_blank(), axis.text.x = element_text(margin=margin(t = 5,r = 5,b = 2,l = 5,"pt"), size = rel(0.9)), # , size = rel(0.9)) axis.text.y = element_text(margin=margin(t = 5,r = 5,b = 5,l = 2,"pt"), size = rel(0.8)), axis.title.y = element_text(margin=margin(t = 5,r = 2,b = 5,l = 2,"pt"), size = rel(0.8)), strip.background = element_rect(fill="white", linetype = "blank") ) p grid.text(label = "A", x = unit(0.02, "npc") , y = unit(0.96,"npc"), gp=gpar(fontsize=11, fontface="bold") ) dev.print(tiff, file = paste0("plots/","Open-Field-Distance-Travelled-Female-and-Male-vFINAL.tiff"), width = 9, height = 6, units = "cm", res=600, compression="lzw") # # # # # # # # # ## No of entries ? ## plot using facet_grid # don't colour outliers bp <- boxplot(of.post$Centre_entries ~ of.post$Treatment + of.post$Sex) bp$out # 60 54 79 38 p <- ggplot(data=of.post, aes(x = Treatment, y = Centre_entries)) + geom_boxplot() + facet_grid(~Sex) p sel1 <- which(of.post$Sex == "female" & of.post$Treatment %in% c("Control","High") & of.post$Centre_entries %in% bp$out) of.post$Centre_entries[sel1] # 60 54 sel2 <- which(of.post$Sex == "male" & of.post$Treatment %in% c("Control","Low") & of.post$Centre_entries %in% bp$out) of.post$Centre_entries[sel2] # 79 38 # Kruskal-Wallis test kt <- kruskal.test( Centre_entries ~ factor(Treatment), of.post[-c(sel1,sel2), ]) # Kruskal Wallis test kt # Kruskal-Wallis rank sum test # data: Centre_entries by factor(Treatment) # Kruskal-Wallis chi-squared = 1.2144, df = 2, p-value = 0.5449 # Females & exclude outlier sel <- which(of.post$Sex == "female") subsel.out <- which(of.post$Treatment[sel] %in% c("Control","High") & of.post$Centre_entries[sel] %in% bp$out) of.post[sel[-subsel.out], ] kt <- kruskal.test( Centre_entries ~ factor(Treatment), of.post[sel[-subsel.out], ] ) # Kruskal Wallis test kt # Kruskal-Wallis rank sum test # data: Centre_entries by factor(Treatment) # Kruskal-Wallis chi-squared = 1.9772, df = 2, p-value = 0.3721 # Males & exclude outlier sel <- which(of.post$Sex == "male") subsel.out <- which(of.post$Treatment[sel] %in% c("Control","Low") & of.post$Centre_entries[sel] %in% bp$out) of.post[sel[-subsel.out], ] kt <- kruskal.test( Centre_entries ~ factor(Treatment), of.post[sel[-subsel.out], ] ) # Kruskal Wallis test kt # Kruskal-Wallis rank sum test # data: Centre_entries by factor(Treatment) # Kruskal-Wallis chi-squared = 5.0461, df = 2, p-value = 0.08021 cols <- c("Control" = "#f46d43", "Low" = "#66c2a5" , "High" = "#5e4fa2" ) # orange, aqua, dark-blue of.post.COPY <- of.post of.post.COPY$Sex <- factor(of.post.COPY$Sex, levels = c("female", "male"), labels=c("Female", "Male")) bp <- boxplot(of.post$Centre_entries ~ of.post$Treatment + of.post$Sex) sel1 <- which(of.post$Sex == "female" & of.post$Treatment %in% c("Control","High") & of.post$Centre_entries %in% bp$out) sel2 <- which(of.post$Sex == "male" & of.post$Treatment %in% c("Control","Low") & of.post$Centre_entries %in% bp$out) set.seed(4321) p <- ggplot(data=of.post.COPY, aes(x = Treatment, y = Centre_entries)) + geom_boxplot( outlier.size=1) + facet_grid(~Sex ) + geom_jitter(data = of.post.COPY[-c(sel1,sel2), ], aes(colour = Treatment), width=0.3, height=0, size=1) + scale_color_manual(values = cols, guide = FALSE) + theme_classic() + labs(y="Centre entries (count)", x=NULL) + theme( plot.margin = margin(t = 2, r = 2, b = 2, l = 5, "pt"), axis.ticks.length=unit(-3, "pt"), axis.ticks.x.bottom = element_blank(), axis.text.x = element_text(margin=margin(t = 5,r = 5,b = 2,l = 5,"pt"), size = rel(0.9)), # , size = rel(0.9)) axis.text.y = element_text(margin=margin(t = 5,r = 5,b = 5,l = 2,"pt"), size = rel(0.8)), axis.title.y = element_text(margin=margin(t = 5,r = 2,b = 5,l = 2,"pt"), size = rel(0.8)), strip.background = element_rect(fill="white", linetype = "blank") ) p grid.text(label = "B", x = unit(0.02, "npc") , y = unit(0.96,"npc"), gp=gpar(fontsize=11, fontface="bold") ) dev.print(tiff, file = paste0("plots/","Open-Field-Centre-Entries-Female-and-Male-vFINAL.tiff"), width = 9, height = 6, units = "cm", res=600, compression="lzw") # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # ### Elevated Plus ep.post <- read_excel(path= paste0(datadir,"/","Elev-plus-post-exposure.xlsx"), sheet=1, range="A1:AA55", col_names = TRUE) ep.post <- as.data.frame(ep.post) str(ep.post) ep.post$Treatment <- factor(ep.post$Treatment, levels = c("Control", "Low" , "High"), ordered = TRUE ) # change char to factor ep.post[ , c( "Code", "Mouse", "ID", "Sex", "Litter" )] <- lapply(ep.post[ , c("Code", "Mouse", "ID", "Sex", "Litter" )], FUN = factor) str(ep.post) # 'data.frame': 54 obs. of 27 variables: # $ Test_no : num 1 2 3 4 5 6 7 8 9 10 ... # $ Treatment : Ord.factor w/ 3 levels "Control"<"Low"<..: 1 1 1 2 2 2 3 3 3 3 ... # $ Code : Factor w/ 18 levels "C1","C2","C3",..: 1 1 1 13 13 13 7 7 7 8 ... # $ Mouse : Factor w/ 3 levels "m1","m2","m3": 1 2 3 1 2 3 1 2 3 1 ... # $ ID : Factor w/ 54 levels "C1m1","C1m2",..: 1 2 3 37 38 39 19 20 21 22 ... # $ Sex : Factor w/ 2 levels "female","male": 2 2 2 1 1 1 2 2 2 2 ... # $ Litter : Factor w/ 15 levels "Blue 20","Blue 24",..: 2 3 11 2 3 11 11 12 13 12 ... # $ Stage : chr "First stage" "First stage" "First stage" "First stage" ... # $ Trial : num 1 1 1 1 1 1 1 1 1 1 ... # $ Apparatus : chr "plus maze 1" "plus maze 2" "plus maze 3" "plus maze 1" ... # $ Duration : num 600 600 600 600 600 600 600 600 600 600 ... # $ Distance : num 12.71 10.05 11.36 9.19 13.67 ... # $ Centre_entries : num 39 71 119 30 67 63 60 83 70 78 ... # $ Centre_time : num 29.6 240.9 142 45.9 93.4 ... # $ Centre_distance : num 1.275 2.455 3.541 0.921 2.077 ... # $ Centre_head_time : num 101.5 71.5 149.7 51.1 92.8 ... # $ Centre_head_entries : num 97 65 104 94 68 61 99 76 61 61 ... # $ Open_arm_entries : num 15 24 75 12 17 30 29 6 32 54 ... # $ Open_arm_time : num 19.9 62.8 145 39.3 17.8 ... # $ Open_arm_distance : num 0.138 0.27 1.94 0.263 0.32 0.806 0.727 0.107 0.794 0.725 ... # $ Open_arm_head_time : num 77.6 161.1 221.9 126.5 54.3 ... # $ Open_arm_head_entries : num 66 57 58 74 28 45 87 72 48 62 ... # $ Closed_arm_entries : num 37 47 82 25 54 49 49 77 56 28 ... # $ Closed_arm_time : num 550 296 313 515 489 ... # $ Closed_arm_distance : num 11.47 7.45 6.36 8.12 11.53 ... # $ Closed_arm_head_time : num 421 368 228 422 453 ... # $ Closed_arm_head_entries: num 80 56 77 66 58 60 76 43 50 81 ... ep.post$non_Open_arm_time <- ep.post$Closed_arm_time + ep.post$Centre_time ep.post$time_centre_and_open_arm <- ep.post$Centre_time + ep.post$Open_arm_time p <- ggplot(data = ep.post, aes(x = Treatment, y = Open_arm_time)) + geom_boxplot() + facet_grid(. ~ Sex) p p <- ggplot(data = ep.post, aes(x = Treatment, y = time_centre_and_open_arm)) + geom_boxplot() + facet_grid(. ~ Sex) p # # # # # # ## # # # ## isolate sexes and exclude outlier data names(ep.post) # [1] "Test_no" "Treatment" "Code" "Mouse" # [5] "ID" "Sex" "Litter" "Stage" # [9] "Trial" "Apparatus" "Duration" "Distance" # [13] "Centre_entries" "Centre_time" "Centre_distance" "Centre_head_time" # [17] "Centre_head_entries" "Open_arm_entries" "Open_arm_time" "Open_arm_distance" # [21] "Open_arm_head_time" "Open_arm_head_entries" "Closed_arm_entries" "Closed_arm_time" # [25] "Closed_arm_distance" "Closed_arm_head_time" "Closed_arm_head_entries" "non_Open_arm_time" # [29] "time_centre_and_open_arm" sel <- which(ep.post$Sex == "female") ep_ct_fem <- ep.post[sel, c("ID", "Treatment", "time_centre_and_open_arm")] ep_ct_fem$group <- as.character(ep_ct_fem$Treatment) ep_ct_fem$ID <- as.character(ep_ct_fem$ID) ep_ct_fem$ID <- factor(ep_ct_fem$ID) str(ep_ct_fem) # 'data.frame': 27 obs. of 4 variables: # $ ID : Factor w/ 27 levels "C2m1","C2m2",..: 19 20 21 1 2 3 22 23 24 10 ... # $ Treatment : Ord.factor w/ 3 levels "Control"<"Low"<..: 2 2 2 1 1 1 2 2 2 3 ... # $ time_centre_and_open_arm: num 85.2 111.2 120.6 50.5 589.5 ... # $ group : chr "Low" "Low" "Low" "Control" ... p <- ggplot(data = ep_ct_fem, aes(x = Treatment, y = time_centre_and_open_arm)) + geom_boxplot() + theme_bw() p ## Exclude outliers for significance test - using default outlier classification ## outlier is defined as values outside 1.5 times the interquartile range above the upper quartile or below the lower quartile. bp <- boxplot(ep_ct_fem$time_centre_and_open_arm ~ ep_ct_fem$group) bp$out # 589.5 41.8 328.3 sel <- which(ep_ct_fem$time_centre_and_open_arm %in% bp$out) ep_ct_fem$time_centre_and_open_arm[sel] # 589.5 41.8 328.3 ep_ct_fem[sel, ] # ID Treatment time_centre_and_open_arm group # 14 C2m2 Control 589.5 Control # 19 L3m1 Low 41.8 Low # 20 L3m2 Low 328.3 Low # exclude outliers ep_ct_fem__ex_out <- ep_ct_fem[-sel, ] str(ep_ct_fem__ex_out) # 'data.frame': 24 obs. of 4 variables: # $ ID : Factor w/ 27 levels "C2m1","C2m2",..: 19 20 21 1 3 24 10 11 12 13 ... # $ Treatment : Ord.factor w/ 3 levels "Control"<"Low"<..: 2 2 2 1 1 2 3 3 3 3 ... # $ time_centre_and_open_arm: num 85.2 111.2 120.6 50.5 91.5 ... # $ group : chr "Low" "Low" "Low" "Control" ... # Kruskal-Wallis test kt <- kruskal.test( time_centre_and_open_arm ~ factor(group), ep_ct_fem__ex_out) # Kruskal Wallis test kt # Kruskal-Wallis rank sum test # # data: time_centre_and_open_arm by factor(group) # Kruskal-Wallis chi-squared = 1.9346, df = 2, p-value = 0.3801 ## i.e. No significant difference! # # # # # # # # # ## Note there are no outliers in males for Elevated plus maze for variable: "time_centre_and_open_arm" names(ep.post) sel <- which(ep.post$Sex == "male") ep_ct_male <- ep.post[sel, c("ID", "Treatment", "time_centre_and_open_arm")] ep_ct_male$group <- as.character(ep_ct_male$Treatment) ep_ct_male$ID <- as.character(ep_ct_male$ID) ep_ct_male$ID <- factor(ep_ct_male$ID) str(ep_ct_male) # 'data.frame': 27 obs. of 4 variables: # $ ID : Factor w/ 27 levels "C1m1","C1m2",..: 1 2 3 10 11 12 13 14 15 19 ... # $ Treatment : Ord.factor w/ 3 levels "Control"<"Low"<..: 1 1 1 3 3 3 3 3 3 2 ... # $ time_centre_and_open_arm: num 49.5 303.7 287 152 449.7 ... # $ group : chr "Control" "Control" "Control" "High" ... ## Exclude boxplot outliers from Kruskal Wallis test bp <- boxplot(ep_ct_male$time_centre_and_open_arm ~ ep_ct_male$group) bp$out # numeric(0) # i.e. no outliers to exclude # Kruskal-Wallis test kt <- kruskal.test( time_centre_and_open_arm ~ factor(group), ep_ct_male) # Kruskal Wallis test kt # Kruskal-Wallis rank sum test # # data: time_centre_and_open_arm by factor(group) # Kruskal-Wallis chi-squared = 0.46914, df = 2, p-value = 0.7909 ## i.e. No significant differences # # # # # # # # # ## plot Elev Plus data with both sexes ## plot using facet_grid # don't colour outliers bp <- boxplot(ep.post$time_centre_and_open_arm ~ ep.post$Treatment + ep.post$Sex) bp$out # 589.5 41.8 328.3 sel <- which(ep.post$time_centre_and_open_arm %in% bp$out) ep.post$time_centre_and_open_arm[sel] # 589.5 41.8 328.3 cols <- c("Control" = "#f46d43", "Low" = "#66c2a5" , "High" = "#5e4fa2" ) # orange, aqua, dark-blue ep.post.COPY <- ep.post ep.post.COPY$Sex <- factor(ep.post.COPY$Sex, levels = c("female", "male"), labels=c("Female", "Male")) set.seed(4321) p <- ggplot(data=ep.post.COPY, aes(x = Treatment, y = time_centre_and_open_arm)) + geom_boxplot(outlier.size = 1) + facet_grid(~Sex ) + geom_jitter(data = ep.post.COPY[-sel, ], aes(colour = Treatment), width=0.3, height=0, size=1) + scale_color_manual(values = cols, guide = FALSE) + theme_classic() + labs(y="Time in centre or open arms (s)", x=NULL) + theme( plot.margin = margin(t = 2, r = 2, b = 2, l = 5, "pt"), axis.ticks.length=unit(-3, "pt"), axis.ticks.x.bottom = element_blank(), axis.text.x = element_text(margin=margin(t = 5,r = 5,b = 2,l = 5,"pt"), size = rel(0.9)), # , size = rel(0.9)) axis.text.y = element_text(margin=margin(t = 5,r = 5,b = 5,l = 2,"pt"), size = rel(0.8)), axis.title.y = element_text(margin=margin(t = 5,r = 2,b = 5,l = 2,"pt"), size = rel(0.8)), strip.background = element_rect(fill="white", linetype = "blank") ) p ggsave(plot=p, filename = paste0("plots/","Elev-Plus-Time-in-Centre-or-Open-Arm-Results-Female-and-Male-vFINAL.tiff"), width = 9, height = 6, units = "cm", dpi = 600, compression = "lzw") ### Compare pre- to post- Elev Plus results ep.pre <- read_excel(path= paste0(datadir,"/","Elev-plus-pre-exposure.xlsx"), sheet=1, range="A1:AA55", col_names = TRUE) ep.pre <- as.data.frame(ep.pre) str(ep.pre) ep.pre$Treatment <- factor(ep.pre$Treatment, levels = c("Control", "Low" , "High"), ordered = TRUE ) # change char to factor ep.pre[ , c( "Code", "Mouse", "ID", "Sex", "Litter" )] <- lapply(ep.pre[ , c("Code", "Mouse", "ID", "Sex", "Litter" )], FUN = factor) names(mice_info) # [1] "mouseID" "Animal no" "Sex" # [4] "Litter" "Date of Birth" "Cage no" # [7] "Cage Name" "Treatment" "Rack" # [10] "Ear notch ID" "Ear notch pos" "Age_received_20_Aug" # [13] "Age_first_soil_exposure_27_Aug" "Rack_position" ep.pre <- merge(x = ep.pre, y = mice_info[ ,c( "mouseID", "Date of Birth", "Cage no", "Cage Name", "Rack", "Age_received_20_Aug", "Age_first_soil_exposure_27_Aug", "Rack_position")], by.x = "ID", by.y = "mouseID", all.x = TRUE ) str(ep.pre) # 'data.frame': 54 obs. of 34 variables: # $ ID : Factor w/ 54 levels "C1m1","C1m2",..: 1 2 3 4 5 6 7 8 9 10 ... # $ Test_no : num 1 2 3 13 14 15 25 26 27 34 ... # $ Treatment : Ord.factor w/ 3 levels "Control"<"Low"<..: 1 1 1 1 1 1 1 1 1 1 ... # $ Code : Factor w/ 18 levels "C1","C2","C3",..: 1 1 1 2 2 2 3 3 3 4 ... # $ Mouse : Factor w/ 3 levels "m1","m2","m3": 1 2 3 1 2 3 1 2 3 1 ... # $ Sex : Factor w/ 2 levels "female","male": 2 2 2 1 1 1 2 2 2 1 ... # $ Litter : Factor w/ 15 levels "Blue 20","Blue 24",..: 2 3 11 11 12 14 9 10 1 15 ... # $ Stage : chr "First stage" "First stage" "First stage" "First stage" ... # $ Trial : num 1 1 1 1 1 1 1 1 1 1 ... # $ Apparatus : chr "plus maze 1" "plus maze 2" "plus maze 3" "plus maze 1" ... # $ Duration : num 600 600 600 600 600 600 600 600 600 600 ... # $ Distance : num 13.45 3.24 9.82 13.59 13.75 ... # $ Centre_entries : num 48 34 77 69 83 65 61 79 90 30 ... # $ Centre_time : num 43.8 88.9 205.9 151.5 142.8 ... # $ Centre_distance : num 1.723 0.585 2.187 1.971 2.609 ... # $ Centre_head_time : num 81.3 67.2 169.2 52.5 97.5 ... # $ Centre_head_entries : num 85 62 73 57 62 13 94 75 63 46 ... # $ Open_arm_entries : num 20 23 35 41 46 40 27 31 58 16 ... # $ Open_arm_time : num 56.7 298 85.5 209.2 187.9 ... # $ Open_arm_distance : num 0.429 0.628 1.149 3.73 3.184 ... # $ Open_arm_head_time : num 82.5 271.2 119.6 250.4 218.3 ... # $ Open_arm_head_entries : num 50 45 49 44 46 10 80 50 54 35 ... # $ Closed_arm_entries : num 33 12 42 36 38 40 46 49 43 19 ... # $ Closed_arm_time : num 500 213 309 239 269 ... # $ Closed_arm_distance : num 11.45 2.13 6.71 8.12 8.43 ... # $ Closed_arm_head_time : num 436 262 311 297 284 ... # $ Closed_arm_head_entries : num 53 25 54 52 52 15 76 65 57 28 ... # $ Date of Birth : POSIXct, format: "2018-08-02" "2018-08-04" "2018-08-02" "2018-08-02" ... # $ Cage no : num 1 1 1 5 5 5 9 9 9 12 ... # $ Cage Name : Factor w/ 18 levels "C1","C2","C3",..: 1 1 1 2 2 2 3 3 3 4 ... # $ Rack : num 1 1 1 2 2 2 3 3 3 4 ... # $ Age_received_20_Aug : num 2.6 2.3 2.6 2.6 2.6 3 4.1 4.1 4.1 3.7 ... # $ Age_first_soil_exposure_27_Aug: num 3.6 3.3 3.6 3.6 3.6 4 5.1 5.1 5.1 4.7 ... # $ Rack_position : Factor w/ 3 levels "low","middle",..: 3 3 3 2 2 2 1 1 1 1 ... ep.pre$time_centre_and_open_arm <- ep.pre$Centre_time + ep.pre$Open_arm_time p <- ggplot(data = ep.pre, aes(x = Treatment, y = time_centre_and_open_arm)) + geom_boxplot() + facet_grid( ~ Sex) + theme_bw() p ## isolate data for females and males respectively - to exclude outliers and test for difference names(ep.pre) # [1] "ID" "Test_no" "Treatment" # [4] "Code" "Mouse" "Sex" # [7] "Litter" "Stage" "Trial" # [10] "Apparatus" "Duration" "Distance" # [13] "Centre_entries" "Centre_time" "Centre_distance" # [16] "Centre_head_time" "Centre_head_entries" "Open_arm_entries" # [19] "Open_arm_time" "Open_arm_distance" "Open_arm_head_time" # [22] "Open_arm_head_entries" "Closed_arm_entries" "Closed_arm_time" # [25] "Closed_arm_distance" "Closed_arm_head_time" "Closed_arm_head_entries" # [28] "Date of Birth" "Cage no" "Cage Name" # [31] "Rack" "Age_received_20_Aug" "Age_first_soil_exposure_27_Aug" # [34] "Rack_position" sel <- which(ep.pre$Sex == "female") #ep_ct_fem_pre <- ep.pre[sel, c("ID", "Treatment", "Centre_time")] ep_ct_fem_pre <- ep.pre[sel, c("ID", "Treatment", "time_centre_and_open_arm")] ## Kruskal-Wallis multiple comparison test str(ep_ct_fem_pre) # 'data.frame': 27 obs. of 3 variables: # $ ID : Factor w/ 54 levels "C1m1","C1m2",..: 4 5 6 10 11 12 13 14 15 25 ... # $ Treatment : Ord.factor w/ 3 levels "Control"<"Low"<..: 1 1 1 1 1 1 1 1 1 3 ... # $ time_centre_and_open_arm: num 360.7 330.7 271.3 84.8 97.2 ... ep_ct_fem_pre$group <- as.character(ep_ct_fem_pre$Treatment) str(ep_ct_fem_pre) # 'data.frame': 27 obs. of 4 variables: # $ ID : Factor w/ 54 levels "C1m1","C1m2",..: 4 5 6 10 11 12 13 14 15 25 ... # $ Treatment : Ord.factor w/ 3 levels "Control"<"Low"<..: 1 1 1 1 1 1 1 1 1 3 ... # $ time_centre_and_open_arm: num 360.7 330.7 271.3 84.8 97.2 ... # $ group : chr "Control" "Control" "Control" "Control" ... p <- ggplot(data = ep_ct_fem_pre, aes(x = Treatment, y = time_centre_and_open_arm)) + geom_boxplot() + theme_bw() p ## Exclude outliers from significance testing - using default outlier classification ## outlier is defined as values outside 1.5 times the interquartile range above the upper quartile or below the lower quartile. bp <- boxplot(ep_ct_fem_pre$time_centre_and_open_arm ~ ep_ct_fem_pre$group) bp$out # none # sel <- which(ep_ct_fem_pre$time_centre_and_open_arm %in% bp$out) # ep_ct_fem_pre$time_centre_and_open_arm[sel] # # ep_ct_fem_pre[sel, ] # # exclude outliers # ep_ct_fem_pre__ex_out <- ep_ct_fem_pre[-sel, ] # str(ep_ct_fem_pre__ex_out) # table(ep_ct_fem_pre__ex_out$Treatment) table(ep_ct_fem_pre$Treatment) # Control Low High # 9 9 9 # Kruskal-Wallis test #kt <- kruskal.test( time_centre_and_open_arm ~ factor(group), ep_ct_fem_pre__ex_out) # Kruskal Wallis test kt <- kruskal.test( time_centre_and_open_arm ~ factor(group), ep_ct_fem_pre) # Kruskal Wallis test kt # Kruskal-Wallis rank sum test # # data: time_centre_and_open_arm by factor(group) # Kruskal-Wallis chi-squared = 3.3474, df = 2, p-value = 0.1875 # # # # # # # # # ## isolate males and subset data names(ep.pre) sel <- which(ep.pre$Sex == "male") ep_ct_male_pre <- ep.pre[sel, c("ID", "Treatment", "time_centre_and_open_arm")] ep_ct_male_pre$group <- as.character(ep_ct_male_pre$Treatment) str(ep_ct_male_pre) # 'data.frame': 27 obs. of 4 variables: # $ ID : Factor w/ 54 levels "C1m1","C1m2",..: 1 2 3 7 8 9 16 17 18 19 ... # $ Treatment : Ord.factor w/ 3 levels "Control"<"Low"<..: 1 1 1 1 1 1 1 1 1 3 ... # $ time_centre_and_open_arm: num 100 387 291 158 246 ... # $ group : chr "Control" "Control" "Control" "Control" ... ## Exclude boxplot outliers from Kruskal Wallis test bp <- boxplot(ep_ct_male_pre$time_centre_and_open_arm ~ ep_ct_male_pre$group) bp$out # none # sel <- which(ep_ct_male_pre$time_centre_and_open_arm %in% bp$out) # ep_ct_male_pre$time_centre_and_open_arm[sel] # # ep_ct_male_pre[sel, ] # # # # exclude outliers # ep_ct_male_pre__ex_out <- ep_ct_male_pre[-sel, ] #str(ep_ct_male_pre__ex_out) #table(ep_ct_male_pre__ex_out$Treatment) table(ep_ct_male_pre$Treatment) # Control Low High # 9 9 9 # Kruskal-Wallis test kt <- kruskal.test( time_centre_and_open_arm ~ factor(group), ep_ct_male_pre) # Kruskal Wallis test kt # Kruskal-Wallis rank sum test # # data: time_centre_and_open_arm by factor(group) # Kruskal-Wallis chi-squared = 4.7019, df = 2, p-value = 0.09528 ## i.e. no significant difference # # # # # # # # # # # # # # # # # # ## Plot Elev Plus - BASELINE - Time in Centre & open Arms - Female & Male ## plot using facet_grid # don't colour outliers bp <- boxplot(ep.pre$time_centre_and_open_arm ~ ep.pre$Treatment + ep.pre$Sex) bp$out # none # sel <- which(ep.pre$time_centre_and_open_arm %in% bp$out) # ep.pre$time_centre_and_open_arm[sel] # cols <- c("Control" = "#f46d43", "Low" = "#66c2a5" , "High" = "#5e4fa2" ) # orange, aqua, dark-blue ep.pre.COPY <- ep.pre ep.pre.COPY$Sex <- factor(ep.pre.COPY$Sex, levels = c("female", "male"), labels=c("Female", "Male")) set.seed(4321) p <- ggplot(data=ep.pre.COPY, aes(x = Treatment, y = time_centre_and_open_arm)) + geom_boxplot(outlier.size=1) + facet_grid(~Sex ) + #geom_jitter(data = ep.pre.COPY[-sel, ], aes(colour = Treatment), width=0.3, height=0, size=1) + geom_jitter(data = ep.pre.COPY, aes(colour = Treatment), width=0.3, height=0, size=1) + scale_color_manual(values = cols, guide = FALSE) + theme_classic() + labs(y="Time in centre or open arms (s)", x=NULL) + theme( plot.margin = margin(t = 2, r = 2, b = 2, l = 5, "pt"), axis.ticks.length=unit(-3, "pt"), axis.ticks.x.bottom = element_blank(), axis.text.x = element_text(margin=margin(t = 5,r = 5,b = 2,l = 5,"pt"), size = rel(0.9)), # , size = rel(0.9)) axis.text.y = element_text(margin=margin(t = 5,r = 5,b = 5,l = 2,"pt"), size = rel(0.8)), axis.title.y = element_text(margin=margin(t = 5,r = 2,b = 5,l = 2,"pt"), size = rel(0.8)), strip.background = element_rect(fill="white", linetype = "blank") ) p dev.print(tiff, file = paste0("plots/","Elev-Plus-BASELINE-Time-in-Centre-and-Open-Arms-Results-Female-and-Male-vFINAL.tiff"), width = 8.7, height = 6, units = "cm", res=600, compression="lzw") # # # # # # # # # ## Distance travelled ## plot using facet_grid # don't colour outliers bp <- boxplot(ep.post$Distance ~ ep.post$Treatment + ep.post$Sex) bp$out # 5.789 2.624 16.023 sel <- which(ep.post$Distance %in% bp$out) ep.post$Distance[sel] # 2.624 5.789 16.023 # Kruskal-Wallis test kt <- kruskal.test( Distance ~ factor(Treatment), ep.post[-sel, ]) # Kruskal Wallis test kt # Kruskal-Wallis rank sum test # data: Distance by factor(Treatment) # Kruskal-Wallis chi-squared = 0.33697, df = 2, p-value = 0.8449 # Males & exclude outlier kt <- kruskal.test( Distance ~ factor(Treatment), ep.post[-which(ep.post$Distance %in% bp$out | ep.post$Sex == "female"), ]) # Kruskal Wallis test kt # Kruskal-Wallis rank sum test # data: Distance by factor(Treatment) # Kruskal-Wallis chi-squared = 2.4744, df = 2, p-value = 0.2902 # Females & exclude outlier kt <- kruskal.test( Distance ~ factor(Treatment), ep.post[-which(ep.post$Distance %in% bp$out | ep.post$Sex == "male"), ]) # Kruskal Wallis test kt # Kruskal-Wallis rank sum test # data: Distance by factor(Treatment) # Kruskal-Wallis chi-squared = 5.0123, df = 2, p-value = 0.08158 cols <- c("Control" = "#f46d43", "Low" = "#66c2a5" , "High" = "#5e4fa2" ) # orange, aqua, dark-blue ep.post.COPY <- ep.post ep.post.COPY$Sex <- factor(ep.post.COPY$Sex, levels = c("female", "male"), labels=c("Female", "Male")) bp <- boxplot(ep.post$Distance ~ ep.post$Treatment + ep.post$Sex) sel <- which(ep.post$Distance %in% bp$out) set.seed(4321) p <- ggplot(data=ep.post.COPY, aes(x = Treatment, y = Distance)) + geom_boxplot( outlier.size=1) + facet_grid(~Sex ) + geom_jitter(data = ep.post.COPY[-sel, ], aes(colour = Treatment), width=0.3, height=0, size=1) + scale_color_manual(values = cols, guide = FALSE) + theme_classic() + labs(y="Distance travelled (m)", x=NULL) + theme( plot.margin = margin(t = 2, r = 2, b = 2, l = 5, "pt"), axis.ticks.length=unit(-3, "pt"), axis.ticks.x.bottom = element_blank(), axis.text.x = element_text(margin=margin(t = 5,r = 5,b = 2,l = 5,"pt"), size = rel(0.9)), # , size = rel(0.9)) axis.text.y = element_text(margin=margin(t = 5,r = 5,b = 5,l = 2,"pt"), size = rel(0.8)), axis.title.y = element_text(margin=margin(t = 5,r = 2,b = 5,l = 2,"pt"), size = rel(0.8)), strip.background = element_rect(fill="white", linetype = "blank") ) p grid.text(label = "A", x = unit(0.02, "npc") , y = unit(0.96,"npc"), gp=gpar(fontsize=11, fontface="bold") ) dev.print(tiff, file = paste0("plots/","Elev-Plus-Distance-Travelled-Female-and-Male-vFINAL.tiff"), width = 9, height = 6, units = "cm", res=600, compression="lzw") # # # # # # # # # ## No of entries ? ## Use entries into centre or open arms ep.post.COPY <- ep.post ep.post.COPY$Sex <- factor(ep.post.COPY$Sex, levels = c("female", "male"), labels=c("Female", "Male")) ep.post.COPY$Centre_and_open_arm_entries <- ep.post.COPY$Centre_entries + ep.post.COPY$Open_arm_entries ## plot using facet_grid # don't colour outliers bp <- boxplot(ep.post.COPY$Centre_and_open_arm_entries ~ ep.post.COPY$Treatment + ep.post.COPY$Sex) bp$out # 144 177 132 50 p <- ggplot(data=ep.post.COPY, aes(x = Treatment, y = Centre_and_open_arm_entries)) + geom_boxplot() + facet_grid(~Sex) p sel1 <- which(ep.post.COPY$Sex == "Female" & ep.post.COPY$Treatment == "Control" & ep.post.COPY$Centre_and_open_arm_entries %in% bp$out) ep.post.COPY$Centre_and_open_arm_entries[sel1] # 144 sel2 <- which(ep.post.COPY$Sex == "Male" & ep.post.COPY$Treatment %in% c("Low","High") & ep.post.COPY$Centre_and_open_arm_entries %in% bp$out) ep.post.COPY$Centre_and_open_arm_entries[sel2] # 132 50 177 # Kruskal-Wallis test kt <- kruskal.test( Centre_and_open_arm_entries ~ factor(Treatment), ep.post.COPY[-c(sel1,sel2), ]) # Kruskal Wallis test kt # Kruskal-Wallis rank sum test # data: Centre_and_open_arm_entries by factor(Treatment) # Kruskal-Wallis chi-squared = 2.027, df = 2, p-value = 0.3629 # Females & exclude outlier sel <- which(ep.post.COPY$Sex == "Female") subsel.out <- which(ep.post.COPY$Treatment[sel] == "Control" & ep.post.COPY$Centre_and_open_arm_entries[sel] %in% bp$out) ep.post.COPY[sel[-subsel.out], ] kt <- kruskal.test( Centre_and_open_arm_entries ~ factor(Treatment), ep.post.COPY[sel[-subsel.out], ] ) # Kruskal Wallis test kt # Kruskal-Wallis rank sum test # data: Centre_and_open_arm_entries by factor(Treatment) # Kruskal-Wallis chi-squared = 3.4015, df = 2, p-value = 0.1825 # Males & exclude outlier sel <- which(ep.post.COPY$Sex == "Male") subsel.out <- which(ep.post.COPY$Treatment[sel] %in% c("Low","High") & ep.post.COPY$Centre_and_open_arm_entries[sel] %in% bp$out) ep.post.COPY[sel[-subsel.out], ] kt <- kruskal.test( Centre_and_open_arm_entries ~ factor(Treatment), ep.post.COPY[sel[-subsel.out], ] ) # Kruskal Wallis test kt # Kruskal-Wallis rank sum test # data: Centre_and_open_arm_entries by factor(Treatment) # Kruskal-Wallis chi-squared = 0.21402, df = 2, p-value = 0.8985 cols <- c("Control" = "#f46d43", "Low" = "#66c2a5" , "High" = "#5e4fa2" ) # orange, aqua, dark-blue bp <- boxplot(ep.post.COPY$Centre_and_open_arm_entries ~ ep.post.COPY$Treatment + ep.post.COPY$Sex) sel1 <- which(ep.post.COPY$Sex == "Female" & ep.post.COPY$Treatment == "Control" & ep.post.COPY$Centre_and_open_arm_entries %in% bp$out) sel2 <- which(ep.post.COPY$Sex == "Male" & ep.post.COPY$Treatment %in% c("Low","High") & ep.post.COPY$Centre_and_open_arm_entries %in% bp$out) set.seed(4321) p <- ggplot(data=ep.post.COPY, aes(x = Treatment, y = Centre_and_open_arm_entries)) + geom_boxplot( outlier.size=1) + facet_grid(~Sex ) + geom_jitter(data = ep.post.COPY[-c(sel1,sel2), ], aes(colour = Treatment), width=0.3, height=0, size=1) + scale_color_manual(values = cols, guide = FALSE) + theme_classic() + labs(y="Centre + open arm entries (count)", x=NULL) + theme( plot.margin = margin(t = 2, r = 2, b = 2, l = 5, "pt"), axis.ticks.length=unit(-3, "pt"), axis.ticks.x.bottom = element_blank(), axis.text.x = element_text(margin=margin(t = 5,r = 5,b = 2,l = 5,"pt"), size = rel(0.9)), # , size = rel(0.9)) axis.text.y = element_text(margin=margin(t = 5,r = 5,b = 5,l = 2,"pt"), size = rel(0.8)), axis.title.y = element_text(margin=margin(t = 5,r = 2,b = 5,l = 2,"pt"), size = rel(0.8)), strip.background = element_rect(fill="white", linetype = "blank") ) p grid.text(label = "B", x = unit(0.02, "npc") , y = unit(0.96,"npc"), gp=gpar(fontsize=11, fontface="bold") ) dev.print(tiff, file = paste0("plots/","Elev-Plus-CENTRE-and-OPEN-Arm-Entries-Female-and-Male-vFINAL.tiff"), width = 9, height = 6, units = "cm", res=600, compression="lzw") #------------------------ #### Dust quantification #------------------------ dust <- read_excel(path= paste0(datadir,"/","Air-dust-weights-within-enclosures.xlsx"), sheet=1, range="A3:D23", col_names = TRUE) dust <- as.data.frame(dust) str(dust) dust$Treatment <- substring(dust$Enclosure,first = 1,last = 1) levels(factor(dust$Treatment)) #[1] "C" "E" "H" "L" unique( dust$Enclosure ) # [1] "C1" "L1" "H1" "H2" "C2" "L2" "L3" "H3" "C3" "H4" "L4" "C4" "C5" # [14] "H5" "L5" "L6" "C6" "H6" "Empty1" "Empty2" # ignore Empty measurements - these were checking +/- 0.05 mg precision of the scales dim(dust) # 20 5 dust <- dust[-c(19,20), ] dust # Enclosure no_dust_mg with_dust_mg actual_dust_mg Treatment # 1 C1 3311.61 3314.27 2.66 C # 2 L1 3203.75 3212.71 8.96 L # 3 H1 3181.73 3189.72 7.99 H # 4 H2 3340.97 3354.52 13.55 H # 5 C2 3380.20 3381.58 1.38 C # 6 L2 3198.74 3210.03 11.29 L # 7 L3 3279.20 3294.09 14.89 L # 8 H3 3231.12 3248.87 17.75 H # 9 C3 3473.82 3476.98 3.16 C # 10 H4 3386.04 3400.12 14.08 H # 11 L4 3345.59 3363.88 18.29 L # 12 C4 3502.18 3505.10 2.92 C # 13 C5 3338.23 3341.11 2.88 C # 14 H5 3448.29 3460.54 12.25 H # 15 L5 3368.33 3380.84 12.51 L # 16 L6 3319.03 3332.96 13.93 L # 17 C6 3458.19 3460.68 2.49 C # 18 H6 3374.56 3382.22 7.66 H dust$Treatment <- factor(dust$Treatment, levels = c("C","L","H"), labels = c("Control", "Low", "High"), ordered = TRUE) ## convert measured dust into dose per animal per week # dust collected over 3 x 20 cm lengths of 2.5 cm wide tubing = area of 150 cm2. # experiment had 3 mice per cage over 7 weeks. # large mouse cage dimensions are 41 cm long x 25 cm wide = 1025 cm2 # convert mg to g dust$actual_dust_permouse_perweek_g <- (1/150)*(1025)*(1/7)*(1/3)*(1/1000)*dust$actual_dust_mg str(dust) # 'data.frame': 18 obs. of 6 variables: # $ Enclosure : chr "C1" "L1" "H1" "H2" ... # $ no_dust_mg : num 3312 3204 3182 3341 3380 ... # $ with_dust_mg : num 3314 3213 3190 3355 3382 ... # $ actual_dust_mg : num 2.66 8.96 7.99 13.55 1.38 ... # $ Treatment : Ord.factor w/ 3 levels "Control"<"Low"<..: 1 2 3 3 1 2 2 3 1 3 ... # $ actual_dust_permouse_perweek_g: num 0.000866 0.002916 0.0026 0.004409 0.000449 ... boxplot(dust$actual_dust_permouse_perweek_g ~ Treatment, data=dust) p <- ggplot(data=dust, aes(x = Treatment, y = actual_dust_permouse_perweek_g)) + geom_boxplot() pp <- p + theme_bw() + labs(y="Dust exposure (g.mouse-1.week-1)", x=NULL) pp ## minimise plot # don't colour outliers bp <- boxplot(dust$actual_dust_permouse_perweek_g ~ dust$Treatment) bp$out # 0.0004490476 sel <- which(dust$actual_dust_permouse_perweek_g %in% bp$out) dust$actual_dust_permouse_perweek_g[sel] # 0.0004490476 # Kruskal-Wallis test kt <- kruskal.test( actual_dust_permouse_perweek_g ~ factor(as.character(Treatment)), dust[-sel, ]) # Kruskal Wallis test kt # Kruskal-Wallis rank sum test # data: actual_dust_permouse_perweek_g by factor(Treatment) # Kruskal-Wallis chi-squared = 10.209, df = 2, p-value = 0.006069 table( dust[-sel, ]$Treatment ) # Control Low High # 5 6 6 ## Dunn Test uses factor vector or non-numeric vector that can be coerced to a factor vector pt <- dunnTest( actual_dust_permouse_perweek_g ~ factor(as.character(Treatment)), data= dust[-sel, ], method = "bonferroni" ) # Warning message: # group was coerced to a factor. pt # Dunn (1964) Kruskal-Wallis multiple comparison # p-values adjusted with the Bonferroni method. # # Comparison Z P.unadj P.adj # 1 Control - High -2.5617739 0.010413908 0.031241724 # 2 Control - Low -2.9978206 0.002719177 0.008157532 # 3 High - Low -0.4573296 0.647434186 1.000000000 pt$dtres pt <- pt$res pt cldList(comparison = pt$Comparison, p.value = pt$P.adj, threshold = 0.05) # Group Letter MonoLetter # 1 Control a a # 2 High b b # 3 Low b b cols <- c("Control" = "#f46d43", "Low" = "#66c2a5" , "High" = "#5e4fa2" ) # orange, aqua, dark-blue set.seed(321) p <- ggplot(data=dust, aes(x = Treatment, y = actual_dust_permouse_perweek_g)) + geom_boxplot(outlier.size=1) + geom_jitter(data = dust[-sel, ], aes(colour = Treatment), width=0.3, height=0, size=1) + scale_color_manual(values = cols, guide = FALSE) + theme_classic() + labs(y= bquote('Dust exposure (g.'*mouse^-1~week^-1*')'), x = NULL ) + theme( plot.margin = margin(t = 2, r = 2, b = 2, l = 5, "pt"), axis.ticks.length=unit(-3, "pt"), axis.ticks.x.bottom = element_blank(), axis.text.x = element_text(margin=margin(t = 5,r = 5,b = 2,l = 5,"pt"), size = rel(0.9)), # , size = rel(0.9)) axis.text.y = element_text(margin=margin(t = 5,r = 5,b = 5,l = 2,"pt"), size = rel(0.8)), axis.title.y = element_text(margin=margin(t = 5,r = 2,b = 5,l = 2,"pt"), size = rel(0.8)) #, #strip.background = element_rect(fill="white", linetype = "blank") ) p # add significance letters grid.text(label = "a", x = unit(0.4, "npc") , y = unit(0.3,"npc"), gp=gpar(fontsize=10.5) ) grid.text(label = "b", x = unit(0.65, "npc") , y = unit(0.88,"npc"), gp=gpar(fontsize=10.5) ) grid.text(label = "b", x = unit(0.9, "npc") , y = unit(0.85,"npc"), gp=gpar(fontsize=10.5) ) dev.print(tiff, file = paste0("plots/","Dust-quantification-boxplots-with-Signif-letters-vFINAL.tiff"), width = 7, height = 6, units = "cm", res=600, compression="lzw") sel <- which(dust$Treatment == "Control") summary(dust$actual_dust_permouse_perweek_g[sel]) # Min. 1st Qu. Median Mean 3rd Qu. Max. # 0.0004490 0.0008241 0.0009013 0.0008401 0.0009469 0.0010283 sel <- which(dust$Treatment == "Low") summary(dust$actual_dust_permouse_perweek_g[sel]) # Min. 1st Qu. Median Mean 3rd Qu. Max. # 0.002916 0.003773 0.004302 0.004332 0.004767 0.005952 sel <- which(dust$Treatment == "High") summary(dust$actual_dust_permouse_perweek_g[sel]) # Min. 1st Qu. Median Mean 3rd Qu. Max. # 0.002493 0.002946 0.004198 0.003974 0.004538 0.005776 ## Ottman et al 2018 have average soil exposures of ~ 32 g.mouse-1.week-1 32/0.004332 # Exposure ratio: Ottman et soil / Low soil dust = 7386 32/0.003974 # Exposure ratio: Ottman et soil / High soil dust = 8052.34 # Zhou et al 2015 also use similar quantities of exposure material directly into cage #------------------------ #### Import microbiome data #------------------------ raw.otu <- read_excel(path= paste0(datadir,"/","absoluteabundance.xlsx"), sheet=1, range="A1:LN10485", col_names = TRUE) raw.otu <- as.data.frame(raw.otu) str(raw.otu) # this is a combined OTU abundance table & taxonomy table dim(raw.otu) # 10484 326 names(raw.otu) # remove "-16S-V3-V4" from column names gsub(pattern = "-16S-V3-V4",x = names(raw.otu), replacement = "") names(raw.otu) <- gsub(pattern = "-16S-V3-V4",x = names(raw.otu), replacement = "") names(raw.otu) names(raw.otu)[1] # "#OTU ID" names(raw.otu)[1] <- "OTU_ID" names(raw.otu) ## split off taxonomy table ... then tidy up later raw.tax <- raw.otu[ ,c("OTU_ID", "Consensus Lineage")] ## tidy up OTU table row.names(raw.otu) <- raw.otu$OTU_ID names(raw.otu)[-c(1, length(names(raw.otu)))] temp <- raw.otu raw.otu <- raw.otu[ , -c(1, length(names(raw.otu)))] dim(raw.otu) # 10484 324 names(raw.otu) # [1] "L3m2Ce" "L3m3T01" "C5a1w1" "L1a2w1" "H5s1w1" "L2s1w7" "L1m3T01" "bat11blk" "L5m3T16" "C3a1w6" "H3m3Ce" "Rem29" # [13] "H1a2w1" "L5s1w1" "Clr26" "L3m3T16" "H3m2Ce" "L1m3T16" "ABLK2w6" "L3m3Ce" "H2s1w7" "L5m3T01" "L2s2w7" "bat8blk" # [25] "bat12blk" "C3a2w6" "L3m1Ce" "C5a2w1" "L1a1w1" "H5s2w1" "ABLK1w6" "H2s2w7" "H1a1w1" "L5s2w1" "H3m1Ce" "L6a1w6" # [37] "L3s2w7" "H3m1T01" "C2a2w6" "bfre3T13" "H1m1T01" "C4a2w1" "L2m1Ce" "C6m2Ce" "H4s2w1" "H5m1T16" "H3m1T16" "C6m3Ce" # [49] "H6a1w6" "H3s2w7" "L4s2w1" "H1m1T16" "H5m1T01" "H2m1Ce" "H1m2T01" "C1m3T16" "C4a1w1" "L2m2Ce" "C6m1Ce" "C5m3T01" # [61] "H4s1w1" "H5m2T16" "L6a2w6" "L3s1w7" "H3m2T01" "C3m3T16" "H2m3Ce" "C2a1w6" "C1m3T01" "L4s1w1" "H1m2T16" "bat14blk" # [73] "H5m2T01" "C5m3T16" "H2m2Ce" "C3m3T01" "H3m2T16" "L2m3Ce" "Rem23" "H6a2w6" "H3s1w7" "L2m2T16" "bat4blk" "L6m2T01" # [85] "Rem36" "L1s2w7" "L4a1w6" "L2a1w1" "H6s2w1" "C4m2Ce" "L4m2T01" "C6a2w1" "L2m2T01" "H1s2w7" "H4a1w6" "C4m3Ce" # [97] "L6m2T16" "Clr39" "H2a1w1" "L6s2w1" "L4m2T16" "L2a2w1" "H6s1w1" "C4m1Ce" "Rem13" "L4m1T01" "C6a1w1" "L2m1T16" # [109] "bat7blk" "L1s1w7" "L6m1T01" "L4a2w6" "H2a2w1" "L6s1w1" "L4m1T16" "H1s1w7" "L2m1T01" "H4a2w6" "L6m1T16" "C5m1Ce" # [121] "L3a2w1" "C2m1T01" "C6m1T16" "L1m2Ce" "Clr16" "H1m3Ce" "C1a1w6" "L5a2w6" "C4m1T16" "C2m1T16" "H1m2Ce" "Rem19" # [133] "H3a2w1" "C6m1T01" "H5a2w6" "C4m1T01" "L1m3Ce" "C1a2w6" "L5a1w6" "C4m2T16" "H4m3T01" "C5m2Ce" "H2m3T16" "C2m2T01" # [145] "L3a1w1" "C6m2T16" "Clr33" "H6m3T01" "L1m1Ce" "C5m3Ce" "H5a1w6" "H4m3T16" "C4m2T01" "C2m2T16" "H2m3T01" "H1m1Ce" # [157] "bat1blk" "H6m3T16" "C6m2T01" "H3a1w1" "C2a2w1" "L4m1Ce" "L3m2T01" "H2s2w1" "bfre2T5" "L6a1w1" "bat10blk" "L5m2T16" # [169] "L5s2w7" "Rem16" "C4a2w6" "L1m2T01" "L2s2w1" "H6a1w1" "Clr19" "L3m2T16" "H4m1Ce" "L5m2T01" "H5s2w7" "L1m2T16" # [181] "bfre3T5" "L5m1T16" "bat13blk" "bat9blk" "L5s1w7" "H4m3Ce" "C4a1w6" "L1m1T01" "C2a1w1" "L4m2Ce" "L3m1T01" "Rem33" # [193] "H2s1w1" "bfre1T5" "L6a2w1" "L4m3Ce" "L5m1T01" "L1m1T16" "H5s1w7" "L2s1w1" "H6a2w1" "L3m1T16" "H4m2Ce" "L4s1w7" # [205] "L1a2w6" "C5a1w6" "Clr36" "H5m3Ce" "C3m1T16" "L5m2Ce" "C5m1T01" "C3a1w1" "bfre2T13" "H3s1w1" "C1m1T16" "C1m1Ce" # [217] "L5m3Ce" "C3m1T01" "H4s1w7" "H1a2w6" "bat16blk" "L3s1w1" "C5m1T16" "Rem39" "H5m2Ce" "C1m1T01" "ABLK2w1" "Clr13" # [229] "L5m1Ce" "C5m2T01" "C3a2w1" "H5m3T16" "bfre1T13" "H1m3T01" "C1m2T16" "H3s2w1" "C1m2Ce" "L4s2w7" "L1a1w6" "C5a2w6" # [241] "H3m3T01" "C3m2T16" "H5m3T01" "bat15blk" "C5m2T16" "L3s2w1" "H5m1Ce" "C1m2T01" "ABLK1w1" "H1m3T16" "C3m2T01" "H3m3T16" # [253] "H4s2w7" "H1a1w6" "C1m3Ce" "bat5blk" "L6m3T01" "L3a2w6" "L2m3T16" "L6s1w7" "L4m3T01" "C3m1Ce" "H1s1w1" "L5a2w1" # [265] "C1a1w1" "H3a2w6" "L6m3T16" "H6s1w7" "L2m3T01" "L4m3T16" "L1s1w1" "H5a2w1" "C3m2Ce" "H1s2w1" "L5a1w1" "C1a2w1" # [277] "bat6blk" "L3a1w6" "L6s2w7" "L1s2w1" "H5a1w1" "C3m3Ce" "H3a1w6" "Clr23" "H6s2w7" "L4a1w1" "H6m1T01" "C2m2Ce" # [289] "L6m1Ce" "H2m1T16" "C6a2w6" "H4m1T01" "Clr29" "L2a1w6" "H6m1Ce" "H6m1T16" "Rem26" "H4a1w1" "H2m1T01" "H4m1T16" # [301] "H2a1w6" "C2m3Ce" "C6a1w6" "H6m3Ce" "C4m3T16" "H4m2T01" "L2a2w6" "C6m3T16" "L4a2w1" "H6m2T01" "C2m1Ce" "L6m2Ce" # [313] "H2m2T16" "C2m3T01" "H4m2T16" "C4m3T01" "H2a2w6" "L6m3Ce" "H6m2Ce" "H6m2T16" "C6m3T01" "C2m3T16" "H4a2w1" "H2m2T01" ## Create 'otuTable' # otu_table - Works on any numeric matrix. # You must also specify if the species are rows or columns otu.16s <- as.matrix(raw.otu) OTU.16s <- otu_table(otu.16s, taxa_are_rows = TRUE) dim(OTU.16s) # 10484 324 ## tidy up Taxonomy table str(raw.tax) # 'data.frame': 10484 obs. of 2 variables: # $ OTU_ID : chr "OTU_1" "OTU_2" "OTU_3" "OTU_4" ... # $ Consensus Lineage: chr "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Lachnospiraceae; g__; s__" "k__Bacteria; p__Firmicutes; c__Bacilli; o__Lactobacillales; f__Lactobacillaceae; g__Lactobacillus; s__" "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Lachnospiraceae; g__; s__" "k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__; g__; s__" ... # Separate the "Consensus Lineage" column, currently separated by semicolon # so that each taxonomic rank has its own column # e.g. "k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Rhizobiales; f__Hyphomicrobiaceae; g__Rhodoplanes; s__" # note there is a space included in the separator "; " tax <- separate(raw.tax, `Consensus Lineage`, c("Kingdom","Phylum","Class","Order", "Family", "Genus","Species"), sep= "; ", remove=TRUE) # Warning message: # Expected 7 pieces. Missing pieces filled with `NA` in 1244 rows [36, 71, 149, 376, 529, 552, 553, 655, 803, 841, 868, 891, 999, 1009, 1048, 1062, 1077, 1169, 1214, 1275, ...]. # move OTU_ID to row.name then delete that column row.names(tax) <- tax$OTU_ID tax <- tax[, -1] # tidy up unknown taxa dim(tax) # 10484 7 taxa_prefix <- c("k__", "p__", "c__", "o__", "f__", "g__", "s__") for (i in 1:length(names(tax))) { sel <- which( is.na(tax[, i]) | tax[, i] == taxa_prefix[i] ) tax[ sel, names(tax)[i] ] <- paste0(taxa_prefix[i],"unknown") } ## Create 'taxonomyTable' # tax_table - Works on any character matrix. # The rownames must match the OTU names (taxa_names) of the otu_table if you plan to combine it with a phyloseq-object. tax <- as.matrix(tax) TAX.16s <- tax_table(tax) ## Create a phyloseq object, merging OTU & TAX tables phy0.16s = phyloseq(OTU.16s, TAX.16s) phy0.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 10484 taxa and 324 samples ] # tax_table() Taxonomy Table: [ 10484 taxa by 7 taxonomic ranks ] ## now join to sample data table samp <- data.frame(sample = sample_names(phy0.16s), samp_type = NA, mouseID = NA, Treatment = NA, Time = NA) str(samp) samp$sample <- as.character(samp$sample) # fill mouseID based on sample names samp$sample # [1] "L3m2Ce" "L3m3T01" "C5a1w1" "L1a2w1" "H5s1w1" "L2s1w7" "L1m3T01" "bat11blk" "L5m3T16" "C3a1w6" "H3m3Ce" "Rem29" # [13] "H1a2w1" "L5s1w1" "Clr26" "L3m3T16" "H3m2Ce" "L1m3T16" "ABLK2w6" "L3m3Ce" "H2s1w7" "L5m3T01" "L2s2w7" "bat8blk" # [25] "bat12blk" "C3a2w6" "L3m1Ce" "C5a2w1" "L1a1w1" "H5s2w1" "ABLK1w6" "H2s2w7" "H1a1w1" "L5s2w1" "H3m1Ce" "L6a1w6" # [37] "L3s2w7" "H3m1T01" "C2a2w6" "bfre3T13" "H1m1T01" "C4a2w1" "L2m1Ce" "C6m2Ce" "H4s2w1" "H5m1T16" "H3m1T16" "C6m3Ce" # [49] "H6a1w6" "H3s2w7" "L4s2w1" "H1m1T16" "H5m1T01" "H2m1Ce" "H1m2T01" "C1m3T16" "C4a1w1" "L2m2Ce" "C6m1Ce" "C5m3T01" # [61] "H4s1w1" "H5m2T16" "L6a2w6" "L3s1w7" "H3m2T01" "C3m3T16" "H2m3Ce" "C2a1w6" "C1m3T01" "L4s1w1" "H1m2T16" "bat14blk" # [73] "H5m2T01" "C5m3T16" "H2m2Ce" "C3m3T01" "H3m2T16" "L2m3Ce" "Rem23" "H6a2w6" "H3s1w7" "L2m2T16" "bat4blk" "L6m2T01" # [85] "Rem36" "L1s2w7" "L4a1w6" "L2a1w1" "H6s2w1" "C4m2Ce" "L4m2T01" "C6a2w1" "L2m2T01" "H1s2w7" "H4a1w6" "C4m3Ce" # [97] "L6m2T16" "Clr39" "H2a1w1" "L6s2w1" "L4m2T16" "L2a2w1" "H6s1w1" "C4m1Ce" "Rem13" "L4m1T01" "C6a1w1" "L2m1T16" # [109] "bat7blk" "L1s1w7" "L6m1T01" "L4a2w6" "H2a2w1" "L6s1w1" "L4m1T16" "H1s1w7" "L2m1T01" "H4a2w6" "L6m1T16" "C5m1Ce" # [121] "L3a2w1" "C2m1T01" "C6m1T16" "L1m2Ce" "Clr16" "H1m3Ce" "C1a1w6" "L5a2w6" "C4m1T16" "C2m1T16" "H1m2Ce" "Rem19" # [133] "H3a2w1" "C6m1T01" "H5a2w6" "C4m1T01" "L1m3Ce" "C1a2w6" "L5a1w6" "C4m2T16" "H4m3T01" "C5m2Ce" "H2m3T16" "C2m2T01" # [145] "L3a1w1" "C6m2T16" "Clr33" "H6m3T01" "L1m1Ce" "C5m3Ce" "H5a1w6" "H4m3T16" "C4m2T01" "C2m2T16" "H2m3T01" "H1m1Ce" # [157] "bat1blk" "H6m3T16" "C6m2T01" "H3a1w1" "C2a2w1" "L4m1Ce" "L3m2T01" "H2s2w1" "bfre2T5" "L6a1w1" "bat10blk" "L5m2T16" # [169] "L5s2w7" "Rem16" "C4a2w6" "L1m2T01" "L2s2w1" "H6a1w1" "Clr19" "L3m2T16" "H4m1Ce" "L5m2T01" "H5s2w7" "L1m2T16" # [181] "bfre3T5" "L5m1T16" "bat13blk" "bat9blk" "L5s1w7" "H4m3Ce" "C4a1w6" "L1m1T01" "C2a1w1" "L4m2Ce" "L3m1T01" "Rem33" # [193] "H2s1w1" "bfre1T5" "L6a2w1" "L4m3Ce" "L5m1T01" "L1m1T16" "H5s1w7" "L2s1w1" "H6a2w1" "L3m1T16" "H4m2Ce" "L4s1w7" # [205] "L1a2w6" "C5a1w6" "Clr36" "H5m3Ce" "C3m1T16" "L5m2Ce" "C5m1T01" "C3a1w1" "bfre2T13" "H3s1w1" "C1m1T16" "C1m1Ce" # [217] "L5m3Ce" "C3m1T01" "H4s1w7" "H1a2w6" "bat16blk" "L3s1w1" "C5m1T16" "Rem39" "H5m2Ce" "C1m1T01" "ABLK2w1" "Clr13" # [229] "L5m1Ce" "C5m2T01" "C3a2w1" "H5m3T16" "bfre1T13" "H1m3T01" "C1m2T16" "H3s2w1" "C1m2Ce" "L4s2w7" "L1a1w6" "C5a2w6" # [241] "H3m3T01" "C3m2T16" "H5m3T01" "bat15blk" "C5m2T16" "L3s2w1" "H5m1Ce" "C1m2T01" "ABLK1w1" "H1m3T16" "C3m2T01" "H3m3T16" # [253] "H4s2w7" "H1a1w6" "C1m3Ce" "bat5blk" "L6m3T01" "L3a2w6" "L2m3T16" "L6s1w7" "L4m3T01" "C3m1Ce" "H1s1w1" "L5a2w1" # [265] "C1a1w1" "H3a2w6" "L6m3T16" "H6s1w7" "L2m3T01" "L4m3T16" "L1s1w1" "H5a2w1" "C3m2Ce" "H1s2w1" "L5a1w1" "C1a2w1" # [277] "bat6blk" "L3a1w6" "L6s2w7" "L1s2w1" "H5a1w1" "C3m3Ce" "H3a1w6" "Clr23" "H6s2w7" "L4a1w1" "H6m1T01" "C2m2Ce" # [289] "L6m1Ce" "H2m1T16" "C6a2w6" "H4m1T01" "Clr29" "L2a1w6" "H6m1Ce" "H6m1T16" "Rem26" "H4a1w1" "H2m1T01" "H4m1T16" # [301] "H2a1w6" "C2m3Ce" "C6a1w6" "H6m3Ce" "C4m3T16" "H4m2T01" "L2a2w6" "C6m3T16" "L4a2w1" "H6m2T01" "C2m1Ce" "L6m2Ce" # [313] "H2m2T16" "C2m3T01" "H4m2T16" "C4m3T01" "H2a2w6" "L6m3Ce" "H6m2Ce" "H6m2T16" "C6m3T01" "C2m3T16" "H4a2w1" "H2m2T01" ## populate: samp$samp_type sel <- grep(pattern = "s", x = samp$sample) # inspect samp$sample[sel] # set values samp$samp_type[sel] <- "soil" sel1 <- grep(pattern = "m", x = samp$sample) # inspect samp$sample[sel1] sel2 <- grep(pattern = "Rem", x = samp$sample) samp$sample[sel2] samp$samp_type[sel1] <- "fecal" # overwrite samp$samp_type[sel2] <- "soil" sel1 <- grep(pattern = "a", x = samp$sample) sel2 <- grep(pattern = "bat", x = samp$sample) samp$sample[sel1] samp$sample[sel2] # set values samp$samp_type[sel1] <- "air" # overwrite samp$samp_type[sel2] <- "extraction blank" sel <- grep(pattern = "ABLK", x = samp$sample) # inspect samp$sample[sel] # set values samp$samp_type[sel] <- "air sampling blank" sel <- grep(pattern = "bfre", x = samp$sample) # inspect samp$sample[sel] # set values samp$samp_type[sel] <- "fresh bedding" sel <- grep(pattern = "Clr", x = samp$sample) # inspect samp$sample[sel] # set values samp$samp_type[sel] <- "soil" sel <- grep(pattern = "Ce", x = samp$sample) # inspect samp$sample[sel] # set values samp$samp_type[sel] <- "cecal" sel <- which(is.na(samp$samp_type)) # empty ## populate: samp$mouseID sel <- which(samp$samp_type %in% c("fecal","cecal")) samp$mouseID[sel] <- samp$sample[sel] # only use first 4 characters samp$mouseID[sel] <- substr(x = samp$mouseID[sel], start = 1, stop = 4) temp <- samp ## populate: samp$Treatment sel <- which( substr(samp$sample, 1, 1) == "H" | substr(samp$sample, 1, 3) == "Rem" ) # inspect samp$sample[sel] samp$Treatment[sel] <- "High" sel <- which( substr(samp$sample, 1, 1) == "L" | substr(samp$sample, 1, 3) == "Clr" ) # inspect samp$sample[sel] samp$Treatment[sel] <- "Low" sel <- which( substr(samp$sample, 1, 2) %in% c("C1","C2","C3","C4","C5","C6") ) # inspect samp$sample[sel] samp$Treatment[sel] <- "Control" table(samp$Treatment) # Control High Low # 78 111 111 # note extra samples for High & Low are associated with source soils ## populate: samp$Time sel <- grep(pattern = "T01", x = samp$sample) # inspect samp$sample[sel] # set values samp$Time[sel] <- "Week 0" sel <- grep(pattern = "T16", x = samp$sample) # inspect samp$sample[sel] # set values samp$Time[sel] <- "Week 7" sel <- grep(pattern = "Rem", x = samp$sample) # inspect samp$sample[sel] # set values samp$Time[sel] <- "Source soil" sel <- grep(pattern = "Clr", x = samp$sample) # inspect samp$sample[sel] # set values samp$Time[sel] <- "Source soil" sel <- grep(pattern = "w1", x = samp$sample) # inspect samp$sample[sel] # set values samp$Time[sel] <- "Week 1" sel <- grep(pattern = "T5", x = samp$sample) # inspect samp$sample[sel] # set values samp$Time[sel] <- "Week 2" sel <- grep(pattern = "T13", x = samp$sample) # inspect samp$sample[sel] # set values samp$Time[sel] <- "Week 6" sel <- grep(pattern = "w6", x = samp$sample) # inspect samp$sample[sel] # set values samp$Time[sel] <- "Week 6" sel <- grep(pattern = "T16", x = samp$sample) # inspect samp$sample[sel] # set values samp$Time[sel] <- "Week 7" sel <- grep(pattern = "w7", x = samp$sample) # inspect samp$sample[sel] # set values samp$Time[sel] <- "Week 7" sel <- grep(pattern = "Ce", x = samp$sample) # inspect samp$sample[sel] # set values samp$Time[sel] <- "Post-exposure" temp <- samp str(samp) # 'data.frame': 324 obs. of 5 variables: # $ sample : chr "L3m2Ce" "L3m3T01" "C5a1w1" "L1a2w1" ... # $ samp_type: chr "cecal" "fecal" "air" "air" ... # $ mouseID : chr "L3m2" "L3m3" NA NA ... # $ Treatment: chr "Low" "Low" "Control" "Low" ... # $ Time : chr "Post-exposure" "Week 0" "Week 1" "Week 1" ... ## merge with 'mice_info' names(samp) # "sample" "samp_type" "mouseID" "Treatment" "Time" names(mice_info) # [1] "mouseID" "Animal no" "Sex" # [4] "Litter" "Date of Birth" "Cage no" # [7] "Cage Name" "Treatment" "Rack" # [10] "Ear notch ID" "Ear notch pos" "Age_received_20_Aug" # [13] "Age_first_soil_exposure_27_Aug" "Rack_position" samp.join <- merge(x = samp, y = mice_info, by = "mouseID", all.x = TRUE) # mouseID, Sex, Litter, Cage Name, Treatment, Rack, Rack_position ### Combine SAMPDATA into phyloseq object row.names(samp.join) <- samp.join$sample head(samp.join) # tidy-up dataframe sel <- which(names(samp.join)=="Treatment.y") samp.join <- samp.join[ ,-sel] sel <- which(names(samp.join)=="Treatment.x") names(samp.join)[sel] <- "Treatment" ## fill in Cage Name - for soil and air and not Clr or Rem sel <- which( substr(samp.join$sample, 1, 2) %in% c("L1","L2","L3","L4","L5","L6","H1","H2","H3","H4","H5","H6") ) samp.join$sample[sel] samp.join$`Cage Name`[sel] <- substr(samp.join$sample, 1, 2)[sel] SAMP.16s <- sample_data(samp.join) phy0.16s <- merge_phyloseq(phy0.16s, SAMP.16s) phy0.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 10484 taxa and 324 samples ] # sample_data() Sample Data: [ 324 samples by 17 sample variables ] # tax_table() Taxonomy Table: [ 10484 taxa by 7 taxonomic ranks ] #------------------------ #### Data cleaning and removal of contaminant taxa - using decontam R package #------------------------ phy0.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 10484 taxa and 324 samples ] # sample_data() Sample Data: [ 324 samples by 17 sample variables ] # tax_table() Taxonomy Table: [ 10484 taxa by 7 taxonomic ranks ] ## remove taxa not assigned as Bacteria levels(factor(tax_table(phy0.16s)[, "Kingdom"])) # "k__Archaea" "k__Bacteria" "Unclassified" rem_taxa <- which(tax_table(phy0.16s)[, "Kingdom"] %in% c("k__Archaea","Unclassified") ) # qty 31 phy1.16s <- prune_taxa(phy0.16s, taxa = row.names(tax_table(phy0.16s)[-rem_taxa, ]) ) phy1.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 10453 taxa and 324 samples ] # sample_data() Sample Data: [ 324 samples by 17 sample variables ] # tax_table() Taxonomy Table: [ 10453 taxa by 7 taxonomic ranks ] ## remove taxa not assigned at the phylum level rem_taxa <- which(tax_table(phy1.16s)[, "Phylum"] == "p__unknown") # qty 134 phy1.16s <- prune_taxa(phy1.16s, taxa = row.names(tax_table(phy1.16s)[-rem_taxa, ]) ) phy1.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 10319 taxa and 324 samples ] # sample_data() Sample Data: [ 324 samples by 17 sample variables ] # tax_table() Taxonomy Table: [ 10319 taxa by 7 taxonomic ranks ] rank_names(phy1.16s) # "Kingdom" "Phylum" "Class" "Order" "Family" "Genus" "Species" sort( as.character( unique( tax_table(phy1.16s)[, "Phylum"] ) )) sort( as.character( unique( tax_table(phy1.16s)[, "Class"] ) )) sort( as.character( unique( tax_table(phy1.16s)[, "Order"] ) )) sort( as.character( unique( tax_table(phy1.16s)[, "Family"] ) )) sort( as.character( unique( tax_table(phy1.16s)[, "Genus"] ) )) ## remove taxa associated with chloroplast, streptophyta, and mitochondria rem_taxa1 <- which(tax_table(phy1.16s)[, "Class"] == "c__Chloroplast") # qty 42 OTUs rem_taxa2 <- which(tax_table(phy1.16s)[, "Order"] == "o__Streptophyta") # qty 8 OTUs rem_taxa3 <- which(tax_table(phy1.16s)[, "Family"] == "f__mitochondria") # qty 33 OTUs c(rem_taxa1,rem_taxa2,rem_taxa3) # [1] 207 592 1361 2351 2629 2697 2752 3171 3225 3537 3546 3552 3563 4019 4203 4257 4364 4489 # [19] 4506 5073 5080 5401 5581 5729 5918 6055 6057 7035 7401 7422 7476 7543 7685 7711 7753 7934 # [37] 8791 8813 8831 9446 9954 10129 207 592 2351 2697 2752 4489 5581 6057 652 1071 1611 1616 # [55] 1643 2148 3132 3164 3265 3565 3940 4351 4714 4719 5190 5679 5856 6107 6895 7113 7273 7764 # [73] 8066 8171 8413 9210 9501 9560 9634 9689 9964 10253 10268 length( c(rem_taxa1,rem_taxa2,rem_taxa3) ) # 83 length( unique(c(rem_taxa1,rem_taxa2,rem_taxa3)) ) # 75 length( unique(c(rem_taxa1,rem_taxa2)) ) # 42 ... i.e. overlap between "o__Streptophyta" and "c__Chloroplast" unique(c(rem_taxa1,rem_taxa2,rem_taxa3)) # [1] 207 592 1361 2351 2629 2697 2752 3171 3225 3537 3546 3552 3563 4019 4203 4257 4364 4489 # [19] 4506 5073 5080 5401 5581 5729 5918 6055 6057 7035 7401 7422 7476 7543 7685 7711 7753 7934 # [37] 8791 8813 8831 9446 9954 10129 652 1071 1611 1616 1643 2148 3132 3164 3265 3565 3940 4351 # [55] 4714 4719 5190 5679 5856 6107 6895 7113 7273 7764 8066 8171 8413 9210 9501 9560 9634 9689 # [73] 9964 10253 10268 # remove chloroplast and mitochondria phy1.16s <- prune_taxa(phy1.16s, taxa = row.names(tax_table(phy1.16s)[-unique(c(rem_taxa1,rem_taxa2,rem_taxa3)), ]) ) phy1.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 10244 taxa and 324 samples ] # sample_data() Sample Data: [ 324 samples by 17 sample variables ] # tax_table() Taxonomy Table: [ 10244 taxa by 7 taxonomic ranks ] min( taxa_sums(phy1.16s) ) # 2 temp <- phy1.16s ## remove taxa that do not occur in at least two samples trim.16s <- prune_taxa( taxa = apply( otu_table(phy1.16s), 1, function(x) {sum(x > 0) >= 2 }), x = phy1.16s) trim.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 8039 taxa and 324 samples ] # sample_data() Sample Data: [ 324 samples by 17 sample variables ] # tax_table() Taxonomy Table: [ 8039 taxa by 7 taxonomic ranks ] ## remove taxa if <50 sequence reads across samples trim.16s <- prune_taxa(taxa = taxa_sums(trim.16s) >= 50 , x = trim.16s) trim.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 3474 taxa and 324 samples ] # sample_data() Sample Data: [ 324 samples by 17 sample variables ] # tax_table() Taxonomy Table: [ 3474 taxa by 7 taxonomic ranks ] ntaxa(trim.16s) # 3474 nsamples(trim.16s) # 324 min(sample_sums(trim.16s)) # 4 max(sample_sums(trim.16s)) # 153088 hist( sample_sums(trim.16s) ) sort( sample_sums(trim.16s) )[1:60] # L1m2T16 H4s2w1 L2m3Ce bat13blk ABLK2w6 bat8blk ABLK1w6 bat12blk ABLK1w1 bfre1T5 bat7blk bat14blk ABLK2w1 # 4 22 31 116 160 231 231 369 450 606 669 691 839 # bat9blk bfre1T13 L3a2w6 bfre3T5 H6a2w6 C2a2w6 L4a1w6 L3a1w6 L5a1w6 L2a1w6 L2a2w6 bat15blk bfre2T13 # 1046 1090 1307 1351 1422 1681 1773 1777 2032 2270 2289 2776 2844 # H3a1w6 L5a2w6 H4a2w6 H6a1w6 L1a1w6 C3a1w1 L6a2w6 H4a1w6 C2a1w6 bfre3T13 bfre2T5 H3a2w6 H1a2w6 # 3031 3034 3232 3554 3587 3645 4016 4134 4334 4524 4676 5150 5188 # C6a1w6 C5a1w6 H1a1w6 C6a2w6 bat16blk L4a2w6 C4a1w6 H4a2w1 L6a1w6 L1a2w6 C4a2w6 L5a2w1 H2a2w1 # 5419 6994 7494 7761 7928 8783 8926 8983 10220 10487 10582 10684 11077 # C3a2w6 bat11blk H4s1w1 bat1blk H4a1w1 H6a2w1 C1a1w6 H4m3T01 # 12045 12564 13901 14075 14235 14818 15241 15298 # guide to use of R decontam package here: # https://benjjneb.github.io/decontam/vignettes/decontam_intro.html ## remove non-representative/failed low biomass samples sort(sample_sums(trim.16s))[1:10] # L1m2T16 H4s2w1 L2m3Ce bat13blk ABLK2w6 bat8blk ABLK1w6 bat12blk ABLK1w1 bfre1T5 # 4 22 31 116 160 231 231 369 450 606 ## these sample failed; they should be high biomass sequence read counts: ## L1m2T16 H4s2w1 L2m3Ce ## therefore omit these samples: ## L1m2T16 = fecal mouse#2 enclosure L1 time16 (week 7) ## H4s2w1 = soil subsample#2 enclosure H4 week 1 ## L2m3Ce = cecal sample mouse#3 enclosure L2 length(sample_names(trim.16s)) # 324 sel <- which(sample_names(trim.16s) %in% c("L1m2T16", "H4s2w1", "L2m3Ce")) ok_samples <- sample_names(trim.16s)[-sel] length(ok_samples) # 321 ok.trim.16s <- prune_samples( sample_names(trim.16s) %in% ok_samples, trim.16s ) ok.trim.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 3474 taxa and 321 samples ] # sample_data() Sample Data: [ 321 samples by 17 sample variables ] # tax_table() Taxonomy Table: [ 3474 taxa by 7 taxonomic ranks ] min(sample_sums(ok.trim.16s)) # 116 min(taxa_sums(ok.trim.16s)) # 50 table( ok.trim.16s@sam_data$samp_type ) # air air sampling blank cecal extraction blank # 72 4 53 14 # fecal fresh bedding soil # 107 6 65 unique( ok.trim.16s@sam_data$samp_type ) # [1] "cecal" "fecal" "air" "soil" "extraction blank" # [6] "air sampling blank" "fresh bedding" ## create new variable in sample data called 'Sample_or_Control', containing: # - True Sample = study sample # - Control Sample = negative control ok.trim.16s@sam_data$Sample_or_Control <- "True Sample" # then over-write with Controls sel <- which(ok.trim.16s@sam_data$samp_type %in% c("extraction blank", "air sampling blank")) ok.trim.16s@sam_data$Sample_or_Control[sel] <- "Control Sample" # summarize Sample_or_Control as logical variable, with TRUE for control samples # as required by isContaminant() sample_data(ok.trim.16s)$is.neg <- sample_data(ok.trim.16s)$Sample_or_Control == "Control Sample" ## Add concentration data quants <- read_excel(path= paste0(datadir,"/","quant-readings.xlsx"), sheet=1, range="A1:B325", col_names = TRUE) quants <- as.data.frame(quants) str(quants) # 'data.frame': 324 obs. of 2 variables: # $ Sample_Name : chr "Rem13" "Rem16" "Rem19" "Rem23" ... # $ Conc_ng_per_uL: num 10 10 10 10 10 10 10 10 10 10 ... # create variable quant_reading to hold concentration information ok.trim.16s@sam_data$quant_reading <- NA ## join concentration info to variable $quant_reading for (i in 1:length(ok.trim.16s@sam_data$sample)) { sel <- which(quants$Sample_Name == ok.trim.16s@sam_data$sample[i] ) if (length(sel)==1) { ok.trim.16s@sam_data$quant_reading[i] <- quants$Conc_ng_per_uL[sel] } } # inspect first 10 rows ok.trim.16s@sam_data[ , c("sample","quant_reading", "Sample_or_Control", "is.neg")][1:10, ] # Sample Data: [10 samples by 4 sample variables]: # sample quant_reading Sample_or_Control is.neg # L3m2Ce L3m2Ce 10.0000 True Sample FALSE # L3m3T01 L3m3T01 9.0000 True Sample FALSE # C5a1w1 C5a1w1 0.0141 True Sample FALSE # L1a2w1 L1a2w1 0.0113 True Sample FALSE # H5s1w1 H5s1w1 10.0000 True Sample FALSE # L2s1w7 L2s1w7 10.0000 True Sample FALSE # L1m3T01 L1m3T01 10.0000 True Sample FALSE # bat11blk bat11blk 0.0010 Control Sample TRUE # L5m3T16 L5m3T16 10.0000 True Sample FALSE # C3a1w6 C3a1w6 0.0199 True Sample FALSE # inspect data dim(ok.trim.16s@sam_data) # 321 20 ok.trim.16s@sam_data[ 1:200 , c("sample","quant_reading", "Sample_or_Control", "is.neg")] ok.trim.16s@sam_data[ 201:321 , c("sample","quant_reading", "Sample_or_Control", "is.neg")] ## inspect library sizes in each sample, as a function of whether that sample # was a true positive sample or a negative control df <- as.data.frame(sample_data(ok.trim.16s)) df$LibrarySize <- sample_sums(ok.trim.16s) df <- df[order(df$LibrarySize),] df$Index <- seq(nrow(df)) ggplot(data=df, aes(x=Index, y=LibrarySize, color=Sample_or_Control)) + geom_point() ### Split contaminant analysis, treating 'low biomass' and 'high biomass' samples separately ## Per user documentation for R decontam package: ## a) Assess low biomass using 'isNotContaminant()' function ## b) Assess high biomass using 'isContaminant()' function ## c) then pool all contaminants and remove from subsequent microbiome data analysis ### a) Assess low biomass: air, air sampling blanks, fresh bedding, and corresponding extraction blanks # Air sel <- which(ok.trim.16s@sam_data$samp_type == "air") lb.air <- ok.trim.16s@sam_data$sample[sel] # Fresh bedding sel <- which(ok.trim.16s@sam_data$samp_type == "fresh bedding") lb.fb <- ok.trim.16s@sam_data$sample[sel] # Air sampling blanks sel <- which(ok.trim.16s@sam_data$samp_type == "air sampling blank") lb.air.blk <- ok.trim.16s@sam_data$sample[sel] # Extraction blanks sel <- which(ok.trim.16s@sam_data$sample %in% c("bat8blk", "bat9blk", "bat13blk", "bat14blk", "bat12blk")) # extraction blank for fresh bedding samples & soil (below) lb.ext.blk <- ok.trim.16s@sam_data$sample[sel] lb_samples <- c(lb.air,lb.fb,lb.air.blk,lb.ext.blk) lb_samples # [1] "C5a1w1" "L1a2w1" "C3a1w6" "H1a2w1" "C3a2w6" "C5a2w1" "L1a1w1" "H1a1w1" "L6a1w6" "C2a2w6" # [11] "C4a2w1" "H6a1w6" "C4a1w1" "L6a2w6" "C2a1w6" "H6a2w6" "L4a1w6" "L2a1w1" "C6a2w1" "H4a1w6" # [21] "H2a1w1" "L2a2w1" "C6a1w1" "L4a2w6" "H2a2w1" "H4a2w6" "L3a2w1" "C1a1w6" "L5a2w6" "H3a2w1" # [31] "H5a2w6" "C1a2w6" "L5a1w6" "L3a1w1" "H5a1w6" "H3a1w1" "C2a2w1" "L6a1w1" "C4a2w6" "H6a1w1" # [41] "C4a1w6" "C2a1w1" "L6a2w1" "H6a2w1" "L1a2w6" "C5a1w6" "C3a1w1" "H1a2w6" "C3a2w1" "L1a1w6" # [51] "C5a2w6" "H1a1w6" "L3a2w6" "L5a2w1" "C1a1w1" "H3a2w6" "H5a2w1" "L5a1w1" "C1a2w1" "L3a1w6" # [61] "H5a1w1" "H3a1w6" "L4a1w1" "C6a2w6" "L2a1w6" "H4a1w1" "H2a1w6" "C6a1w6" "L2a2w6" "L4a2w1" # [71] "H2a2w6" "H4a2w1" "bfre3T13" "bfre2T5" "bfre3T5" "bfre1T5" "bfre2T13" "bfre1T13" "ABLK2w6" "ABLK1w6" # [81] "ABLK2w1" "ABLK1w1" "bat8blk" "bat12blk" "bat14blk" "bat13blk" "bat9blk" lb.ok.trim.16s <- prune_samples( sample_names(ok.trim.16s) %in% lb_samples, ok.trim.16s ) lb.ok.trim.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 3474 taxa and 87 samples ] # sample_data() Sample Data: [ 87 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 3474 taxa by 7 taxonomic ranks ] min(sample_sums(lb.ok.trim.16s)) # 116 min(taxa_sums(lb.ok.trim.16s)) # 0 # prune taxa that have zero sequence reads lb.ok.trim.16s <- prune_taxa(taxa = taxa_sums(lb.ok.trim.16s) > 0, x = lb.ok.trim.16s) lb.ok.trim.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 2811 taxa and 87 samples ] # sample_data() Sample Data: [ 87 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 2811 taxa by 7 taxonomic ranks ] ## need inputs to function: # isNotContaminant(seqtab = , neg = , # method = "prevalence", threshold = 0.5, normalize = TRUE, detailed = FALSE) mat <- as.matrix(as.data.frame(t(lb.ok.trim.16s@otu_table))) # requires a feature table recording the observed abundances of each sequence (or OTU) # in each sample. Rows should correspond to samples, and columns to sequences (or OTUs). class(mat) # "matrix" mat[1:5, 1:5] # OTU_1 OTU_2 OTU_3 OTU_4 OTU_5 # C5a1w1 1185 211 3449 2007 111 # L1a2w1 445 0 678 57 123 # C3a1w6 1769 244 5575 3822 636 # H1a2w1 153 132 64 84 352 # ABLK2w6 1 2 1 0 1 lb.NOT.contam <- isNotContaminant(seqtab = mat, neg = lb.ok.trim.16s@sam_data$is.neg, method = "prevalence", threshold = 0.5, normalize = TRUE, detailed = TRUE) # with detailed = TRUE, returns a data.frame containing diagnostic information on # the non-contaminant decision. head(lb.NOT.contam) # freq prev p.freq p.prev p not.contaminant # OTU_1 0.031961951 78 NA 1.878745e-04 1.878745e-04 TRUE # OTU_2 0.007202436 67 NA 2.160981e-03 2.160981e-03 TRUE # OTU_3 0.087721566 84 NA 3.962451e-04 3.962451e-04 TRUE # OTU_4 0.048224660 77 NA 1.566598e-05 1.566598e-05 TRUE # OTU_5 0.012677907 77 NA 3.630779e-04 3.630779e-04 TRUE # OTU_6 0.003664329 35 NA 1.361211e-01 1.361211e-01 TRUE table(lb.NOT.contam$not.contaminant) # FALSE TRUE # 726 2085 lb.taxa.NotContam <- row.names(lb.NOT.contam)[which(lb.NOT.contam$not.contaminant==TRUE)] lb.taxa.NotContam <- sort(lb.taxa.NotContam) lb.taxa.IsContam <- row.names(lb.NOT.contam)[which(lb.NOT.contam$not.contaminant==FALSE)] lb.taxa.IsContam <- sort(lb.taxa.IsContam) ### b) Assess high biomass: soil, cecal, fecal, and corresponding extraction blanks unique( ok.trim.16s@sam_data$samp_type ) # Soil sel <- which(ok.trim.16s@sam_data$samp_type == "soil") hb.soil <- ok.trim.16s@sam_data$sample[sel] # Cecal sel <- which(ok.trim.16s@sam_data$samp_type == "cecal") hb.cecal <- ok.trim.16s@sam_data$sample[sel] # Fecal sel <- which(ok.trim.16s@sam_data$samp_type == "fecal") hb.fecal <- ok.trim.16s@sam_data$sample[sel] # Extraction blanks sel <- which(ok.trim.16s@sam_data$sample %in% c( "bat1blk", "bat4blk", "bat5blk", "bat6blk", "bat7blk", "bat10blk", "bat11blk", "bat12blk", # extraction blank for both fresh bedding & soil "bat15blk", "bat16blk")) hb.ext.blk <- ok.trim.16s@sam_data$sample[sel] hb_samples <- c(hb.soil,hb.cecal,hb.fecal,hb.ext.blk) hb_samples # [1] "H5s1w1" "L2s1w7" "Rem29" "L5s1w1" "Clr26" "H2s1w7" "L2s2w7" "H5s2w1" "H2s2w7" "L5s2w1" # [11] "L3s2w7" "H3s2w7" "L4s2w1" "H4s1w1" "L3s1w7" "L4s1w1" "Rem23" "H3s1w7" "Rem36" "L1s2w7" # [21] "H6s2w1" "H1s2w7" "Clr39" "L6s2w1" "H6s1w1" "Rem13" "L1s1w7" "L6s1w1" "H1s1w7" "Clr16" # [31] "Rem19" "Clr33" "H2s2w1" "L5s2w7" "Rem16" "L2s2w1" "Clr19" "H5s2w7" "L5s1w7" "Rem33" # [41] "H2s1w1" "H5s1w7" "L2s1w1" "L4s1w7" "Clr36" "H3s1w1" "H4s1w7" "L3s1w1" "Rem39" "Clr13" # [51] "H3s2w1" "L4s2w7" "L3s2w1" "H4s2w7" "L6s1w7" "H1s1w1" "H6s1w7" "L1s1w1" "H1s2w1" "L6s2w7" # [61] "L1s2w1" "Clr23" "H6s2w7" "Clr29" "Rem26" "L3m2Ce" "H3m3Ce" "H3m2Ce" "L3m3Ce" "L3m1Ce" # [71] "H3m1Ce" "L2m1Ce" "C6m2Ce" "C6m3Ce" "H2m1Ce" "L2m2Ce" "C6m1Ce" "H2m3Ce" "H2m2Ce" "C4m2Ce" # [81] "C4m3Ce" "C4m1Ce" "C5m1Ce" "L1m2Ce" "H1m3Ce" "H1m2Ce" "L1m3Ce" "C5m2Ce" "L1m1Ce" "C5m3Ce" # [91] "H1m1Ce" "L4m1Ce" "H4m1Ce" "H4m3Ce" "L4m2Ce" "L4m3Ce" "H4m2Ce" "H5m3Ce" "L5m2Ce" "C1m1Ce" # [101] "L5m3Ce" "H5m2Ce" "L5m1Ce" "C1m2Ce" "H5m1Ce" "C1m3Ce" "C3m1Ce" "C3m2Ce" "C3m3Ce" "C2m2Ce" # [111] "L6m1Ce" "H6m1Ce" "C2m3Ce" "H6m3Ce" "C2m1Ce" "L6m2Ce" "L6m3Ce" "H6m2Ce" "L3m3T01" "L1m3T01" # [121] "L5m3T16" "L3m3T16" "L1m3T16" "L5m3T01" "H3m1T01" "H1m1T01" "H5m1T16" "H3m1T16" "H1m1T16" "H5m1T01" # [131] "H1m2T01" "C1m3T16" "C5m3T01" "H5m2T16" "H3m2T01" "C3m3T16" "C1m3T01" "H1m2T16" "H5m2T01" "C5m3T16" # [141] "C3m3T01" "H3m2T16" "L2m2T16" "L6m2T01" "L4m2T01" "L2m2T01" "L6m2T16" "L4m2T16" "L4m1T01" "L2m1T16" # [151] "L6m1T01" "L4m1T16" "L2m1T01" "L6m1T16" "C2m1T01" "C6m1T16" "C4m1T16" "C2m1T16" "C6m1T01" "C4m1T01" # [161] "C4m2T16" "H4m3T01" "H2m3T16" "C2m2T01" "C6m2T16" "H6m3T01" "H4m3T16" "C4m2T01" "C2m2T16" "H2m3T01" # [171] "H6m3T16" "C6m2T01" "L3m2T01" "L5m2T16" "L1m2T01" "L3m2T16" "L5m2T01" "L5m1T16" "L1m1T01" "L3m1T01" # [181] "L5m1T01" "L1m1T16" "L3m1T16" "C3m1T16" "C5m1T01" "C1m1T16" "C3m1T01" "C5m1T16" "C1m1T01" "C5m2T01" # [191] "H5m3T16" "H1m3T01" "C1m2T16" "H3m3T01" "C3m2T16" "H5m3T01" "C5m2T16" "C1m2T01" "H1m3T16" "C3m2T01" # [201] "H3m3T16" "L6m3T01" "L2m3T16" "L4m3T01" "L6m3T16" "L2m3T01" "L4m3T16" "H6m1T01" "H2m1T16" "H4m1T01" # [211] "H6m1T16" "H2m1T01" "H4m1T16" "C4m3T16" "H4m2T01" "C6m3T16" "H6m2T01" "H2m2T16" "C2m3T01" "H4m2T16" # [221] "C4m3T01" "H6m2T16" "C6m3T01" "C2m3T16" "H2m2T01" "bat11blk" "bat12blk" "bat4blk" "bat7blk" "bat1blk" # [231] "bat10blk" "bat16blk" "bat15blk" "bat5blk" "bat6blk" hb.ok.trim.16s <- prune_samples( sample_names(ok.trim.16s) %in% hb_samples, ok.trim.16s ) hb.ok.trim.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 3474 taxa and 235 samples ] # sample_data() Sample Data: [ 235 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 3474 taxa by 7 taxonomic ranks ] min(sample_sums(hb.ok.trim.16s)) # 369 min(taxa_sums(hb.ok.trim.16s)) # 0 # prune taxa that have zero sequence reads hb.ok.trim.16s <- prune_taxa(taxa = taxa_sums(hb.ok.trim.16s) > 0, x = hb.ok.trim.16s) hb.ok.trim.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 3414 taxa and 235 samples ] # sample_data() Sample Data: [ 235 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 3414 taxa by 7 taxonomic ranks ] min(taxa_sums(hb.ok.trim.16s)) # 1 head(hb.ok.trim.16s@sam_data[ , c("sample","quant_reading", "is.neg")]) # Sample Data: [6 samples by 3 sample variables]: # sample quant_reading is.neg # L3m2Ce L3m2Ce 1e+01 FALSE # L3m3T01 L3m3T01 9e+00 FALSE # H5s1w1 H5s1w1 1e+01 FALSE # L2s1w7 L2s1w7 1e+01 FALSE # L1m3T01 L1m3T01 1e+01 FALSE # bat11blk bat11blk 1e-03 TRUE ## run 'isContaminant()' function contamdf.either <- isContaminant(hb.ok.trim.16s, method="either", conc="quant_reading" , neg="is.neg", threshold = 0.1) # message: Using same threshold value for the frequency and prevalence contaminant identification. table(contamdf.either$contaminant) # FALSE TRUE # 3393 21 head(contamdf.either) # freq prev p.freq p.prev p contaminant # OTU_1 0.05286111 191 0.4949171 0.26549467 NA FALSE # OTU_2 0.05297538 201 0.4930715 0.69963695 NA FALSE # OTU_3 0.07024606 198 0.4667332 0.34384987 NA FALSE # OTU_4 0.02893894 179 0.4520002 0.41270659 NA FALSE # OTU_5 0.03054148 192 0.4658868 0.57778180 NA FALSE # OTU_6 0.01358069 135 0.5014358 0.07721348 NA TRUE hb.taxa.NotContam <- row.names(contamdf.either)[which(contamdf.either$contaminant==FALSE)] hb.taxa.NotContam <- sort(hb.taxa.NotContam) hb.taxa.IsContam <- row.names(contamdf.either)[which(contamdf.either$contaminant==TRUE)] hb.taxa.IsContam <- sort(hb.taxa.IsContam) ### c) pool contaminants and remove from subsequent microbiome data analysis # contaminants identified from low biomass samples lb.taxa.IsContam # contaminants identified from high biomass samples hb.taxa.IsContam # Do contaminant taxa overlap? length(lb.taxa.IsContam) # 726 length(hb.taxa.IsContam) # 21 length(c(lb.taxa.IsContam, hb.taxa.IsContam)) # 747 length(unique(c(lb.taxa.IsContam, hb.taxa.IsContam))) # 744 length(which(lb.taxa.IsContam %in% hb.taxa.IsContam)) # 3 OTUs are overlapping all.taxa.IsContam <- unique(c(lb.taxa.IsContam,hb.taxa.IsContam)) length(all.taxa.IsContam) # 744 ## remove blanks from ok.trim.16s unique(ok.trim.16s@sam_data$samp_type) # [1] "soil" "extraction blank" "cecal" "air" # [5] "fresh bedding" "fecal" "air sampling blank" clean.16s <- prune_samples(ok.trim.16s@sam_data$samp_type %in% c("soil", "cecal", "air", "fresh bedding", "fecal"), ok.trim.16s) ## then remove contaminant taxa sel.rm <- which(row.names(clean.16s@otu_table) %in% all.taxa.IsContam) # qty 744 keep_taxa <- row.names(clean.16s@otu_table)[-sel.rm] clean.16s <- prune_taxa(clean.16s, taxa = keep_taxa ) clean.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 2730 taxa and 303 samples ] # sample_data() Sample Data: [ 303 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 2730 taxa by 7 taxonomic ranks ] min(sample_sums(clean.16s)) # 345 sort(sample_sums(clean.16s))[1:10] # bfre1T5 bfre1T13 bfre3T5 L3a2w6 H6a2w6 C2a2w6 L3a1w6 L4a1w6 L5a1w6 bfre2T13 # 345 839 945 1307 1400 1681 1683 1719 1850 2179 min(taxa_sums(clean.16s)) # 9 #--------------------- #### Ordination of soil only samples #------------------------ unique(clean.16s@sam_data$samp_type) # [1] "cecal" "fecal" "air" "soil" "fresh bedding" soil.dc.16s <- prune_samples( clean.16s@sam_data$samp_type == "soil", clean.16s ) soil.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 2730 taxa and 65 samples ] # sample_data() Sample Data: [ 65 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 2730 taxa by 7 taxonomic ranks ] min(sample_sums(soil.dc.16s)) # 12524 min(taxa_sums(soil.dc.16s)) # 0 # prune taxa that have zero sequence reads soil.dc.16s <- prune_taxa(taxa = taxa_sums(soil.dc.16s) > 0, x = soil.dc.16s) soil.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 2345 taxa and 65 samples ] # sample_data() Sample Data: [ 65 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 2345 taxa by 7 taxonomic ranks ] sort( sample_sums(soil.dc.16s) ) # H4s1w1 Rem19 H5s2w1 H6s2w1 L3s2w7 H6s1w1 L4s1w1 Rem29 H3s2w7 Rem36 Clr29 L6s1w1 Rem39 Rem13 Rem26 H3s1w1 # 12524 14024 15241 15297 15550 15768 16289 16455 17208 18208 18381 18655 18872 19844 19869 19882 # L5s2w1 Clr26 L4s2w1 H1s2w1 L2s2w1 H2s1w1 Rem16 L1s2w1 Clr33 Rem33 Clr36 L2s1w1 L3s2w1 Clr23 L5s1w1 Clr19 # 20051 20398 20442 20668 20868 21389 22365 22497 22824 24000 24663 24841 25196 25230 25421 25964 # H2s2w1 H6s2w7 H3s2w1 H4s1w7 Clr39 H1s1w1 L1s1w1 L3s1w1 Clr16 L1s2w7 H1s1w7 Rem23 H1s2w7 H5s1w1 H2s2w7 L6s2w7 # 26225 26362 28360 28400 28739 29651 33379 33578 33940 34036 35401 36772 38175 38591 41192 43747 # H3s1w7 H5s2w7 H5s1w7 L4s1w7 H2s1w7 L6s2w1 L5s1w7 L6s1w7 L5s2w7 H4s2w7 L2s1w7 H6s1w7 L2s2w7 L4s2w7 L1s1w7 Clr13 # 44425 44807 45190 45254 45849 51242 51383 51424 51723 51882 53256 53557 54015 54825 55598 57129 # L3s1w7 # 63813 table( soil.dc.16s@sam_data$samp_type ) # soil # 65 table( soil.dc.16s@sam_data$Treatment ) # High Low # 32 33 table( soil.dc.16s@sam_data$Time ) # Source soil Week 1 Week 7 # 18 23 24 ## Use lowest number of reads to create rarefied dataset #set.seed(123) seed <- 123 rare.soil.dc.16s <- rarefy_even_depth(soil.dc.16s, sample.size = min(sample_sums(soil.dc.16s)), rngseed = seed, replace = FALSE, trimOTUs = TRUE, verbose = TRUE) sample_sums(rare.soil.dc.16s) # all 12524 min( taxa_sums(rare.soil.dc.16s) ) # 1 ntaxa(rare.soil.dc.16s) # 2298 nsamples(rare.soil.dc.16s) # 65 ### ORDINATION PLOT [finished-plot] # # # # # # # # # # # # # # # ### NMDS + Bray-Curtis set.seed(123) ord <- ordinate(rare.soil.dc.16s, "NMDS", "bray") ord # Call: # metaMDS(comm = veganifyOTU(physeq), distance = distance) # # global Multidimensional Scaling using monoMDS # # Data: wisconsin(sqrt(veganifyOTU(physeq))) # Distance: bray # # Dimensions: 2 # Stress: 0.04704312 # Stress type 1, weak ties # No convergent solutions - best solution after 20 tries # Scaling: centring, PC rotation, halfchange scaling # Species: expanded scores based on 'wisconsin(sqrt(veganifyOTU(physeq)))' str(ord) rare.soil.dc.16s@sam_data$samp_type # all "soil" rare.soil.dc.16s@sam_data$Time # either: "Source soil", "Week 1" , "Week 7" length(unique(rare.soil.dc.16s@sam_data$Treatment)) # 2 length(unique(rare.soil.dc.16s@sam_data$Cage.Name)) # 13 length(unique(rare.soil.dc.16s@sam_data$sample)) # 65 p <- plot_ordination(rare.soil.dc.16s, ord, type="samples", color="Treatment", shape="Time") p str(p) p$data$Treatment <- factor(p$data$Treatment, levels = c("Control","Low","High"), ordered = TRUE) p$data$Time <- factor(p$data$Time, levels = c("Source soil", "Week 1", "Week 7"), ordered = TRUE) cols <- c("Control" = "#f46d43", "Low" = "#66c2a5" , "High" = "#5e4fa2" ) # orange, aqua, dark-blue shapes <- c("Source soil" = 15, "Week 1" = 16, "Week 7" = 17 ) pp <- p + scale_colour_manual(values = cols) + scale_shape_manual(values = shapes) + theme_bw() + annotate(geom="text", x= 1.25, y= -0.35, label = paste0("Stress = ",round(ord$stress,5)), hjust=1, vjust=1, size=3.25 ) + guides( color = guide_legend(order = 1), shape = guide_legend(order = 2) ) pp ## Minimise plot size ## default gggplot2 plot margins theme_bw()$plot.margin #[1] 5.5pt 5.5pt 5.5pt 5.5pt pp <- p + geom_point(size=0.8) + scale_colour_manual(values = cols) + scale_shape_manual(values = shapes) + theme_bw() + #annotate(geom="text", x= 1.40, y= 0.375, label = paste0("Stress = ",round(ord$stress,4)), hjust=1, vjust=1, size=2 ) + annotate(geom="text", x= 1.40, y= 0.375, label = paste0("Stress = ",round(ord$stress,4)), hjust=1, vjust=1, size=2.5 ) + guides( color = guide_legend(order = 1), shape = guide_legend(order = 2) )+ #ggtitle("A") + labs(x = NULL, y = NULL) + theme( axis.text.x = element_blank(), axis.text.y = element_blank(), axis.ticks = element_blank(), plot.title=element_text(face = "bold", hjust = 0), plot.margin = margin(t = 2, r = 5, b = 2, l = 2, "pt"), # https://ggplot2.tidyverse.org/reference/element.html #http://r-statistics.co/Complete-Ggplot2-Tutorial-Part2-Customizing-Theme-With-R-Code.html#Legend%20Positions legend.justification=c(1,0), legend.position=c(0.95, 0.05), legend.background = element_blank(), legend.key = element_blank(), #legend.key.size = unit(2, "pt"), legend.key.size = unit(2.5, "pt"), #legend.text = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.5)), #legend.title = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.6)), legend.text = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.6)), legend.title = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.7)), #legend.margin=margin(t = 5.5, r = 2, b = 5.5, l = 0, unit='pt'), legend.margin = margin(t = 0,r = 0,b = 0,l = 0,"pt"), #legend.box.spacing = unit(2, "pt"), legend.box.spacing = unit(0, "pt"), legend.box.margin = margin(t = 0,r = 0,b = 0,l = 0,"pt"), legend.spacing = unit(4, "pt"), panel.grid.major = element_blank(), panel.grid.minor = element_blank() ) pp grid.text(label = "A" , x = unit(0.075, "npc") , y = unit(0.93,"npc"), gp=gpar(fontsize=11, fontface="bold") ) dev.print(tiff, file = paste0("plots/","Ordination-Soil-only-1xRarefy-A-vFINAL.tiff"), width = 7, height = 5.95, units = "cm", res=600, compression="lzw") # Test hypothesis that microbiota vary (with different centroids) by Treatment # Calculate bray curtis distance matrix set.seed(123) bray.rare.soil.dc.16s <- phyloseq::distance(rare.soil.dc.16s, method = "bray") sampledf <- data.frame(sample_data(rare.soil.dc.16s)) str(sampledf) # Adonis test set.seed(123) adonis(bray.rare.soil.dc.16s ~ Treatment, data = sampledf) # Call: # adonis(formula = bray.rare.soil.dc.16s ~ Treatment, data = sampledf) # # Permutation: free # Number of permutations: 999 # # Terms added sequentially (first to last) # # Df SumsOfSqs MeanSqs F.Model R2 Pr(>F) # Treatment 1 4.3160 4.3160 75.445 0.54495 0.001 *** # Residuals 63 3.6041 0.0572 0.45505 # Total 64 7.9201 1.00000 # --- # Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 # Homogeneity of dispersion test beta <- betadisper(bray.rare.soil.dc.16s, sampledf$Treatment) set.seed(123) permutest(beta) # Permutation test for homogeneity of multivariate dispersions # Permutation: free # Number of permutations: 999 # # Response: Distances # Df Sum Sq Mean Sq F N.Perm Pr(>F) # Groups 1 0.00101 0.001008 0.0724 999 0.78 # Residuals 63 0.87728 0.013925 table(sampledf$Treatment) # High Low # 32 33 table(sampledf$Time) # Source soil Week 1 Week 7 # 18 23 24 # #------------------------ #### Alpha diversity of ALL soils - including merged sample bootstrap resampling #------------------------ ## the function below uses merged-sample bootstrap resampling to estimate the ## overall site-level alpha diversity. ## merging by group for triplicates (source soils) and duplicates (treatment soils in trays) ## create function to rarefy for initial sample > merge samples by type > rarefy again > calculate shannon's index # # # # # # # # # # # # calc_AlphaDiv_in_parallel_any_merge_no <- function(phy_obj, merge_by ) { # rarefy seed <- 123+j r16s <- rarefy_even_depth(phy_obj, sample.size = min(sample_sums(phy_obj)), rngseed = seed, replace = TRUE, trimOTUs = TRUE, verbose = TRUE) # merge samples merged.r16s <- merge_samples(r16s, group= eval(parse(text= paste0("r16s@sam_data$",merge_by))) ) # rarefy back to min sample size for this bootstrap sample - i.e. rarefied-merged-rarefied seed <- 234+j rmr16s <- rarefy_even_depth(merged.r16s, sample.size = min(sample_sums(phy_obj)), rngseed = seed, replace = TRUE, trimOTUs = TRUE, verbose = TRUE) # - - - - - - - - - - ### ALPHA DIVERSITY ## calculate Shannon's index shan.rmr16s <- plot_richness(rmr16s, measures=c("Shannon")) # export data out <- data.frame(sample=shan.rmr16s$data$samples,shannon=shan.rmr16s$data$value, group=shan.rmr16s$data$samples) out$eff_no <- exp(out$shannon) out$calc_type <- "bootstrap" return(out) } # # # # # # # # # # # # ## apply this to previously decontaminated dataset: soil.dc.16s table( soil.dc.16s@sam_data$Time ) # Source soil Week 1 Week 7 # 18 23 24 names( soil.dc.16s@sam_data ) # [1] "mouseID" "sample" "samp_type" # [4] "Treatment" "Time" "Animal.no" # [7] "Sex" "Litter" "Date.of.Birth" # [10] "Cage.no" "Cage.Name" "Rack" # [13] "Ear.notch.ID" "Ear.notch.pos" "Age_received_20_Aug" # [16] "Age_first_soil_exposure_27_Aug" "Rack_position" "Sample_or_Control" # [19] "is.neg" "quant_reading" soil.dc.16s@sam_data[ ,c("sample","samp_type","Treatment","Time")] soil.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 2345 taxa and 65 samples ] # sample_data() Sample Data: [ 65 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 2345 taxa by 7 taxonomic ranks ] min(sample_sums(soil.dc.16s)) # 12524 min(taxa_sums(soil.dc.16s)) # 1 phy_obj <- soil.dc.16s phy_obj@sam_data[ ,c("sample","samp_type","Treatment","Time")] # create grouping variable for each site phy_obj@sam_data$group <- NA phy_obj@sam_data$group <- paste0( substring(phy_obj@sam_data$sample,1,2), substring(phy_obj@sam_data$sample,nchar(phy_obj@sam_data$sample)-1,nchar(phy_obj@sam_data$sample))) # then overwrite with ... sel <- grep(pattern = "Clr1", x = phy_obj@sam_data$sample) phy_obj@sam_data$group[sel] <- "Clr" sel <- grep(pattern = "Clr2", x = phy_obj@sam_data$sample) phy_obj@sam_data$group[sel] <- "Clr" sel <- grep(pattern = "Clr3", x = phy_obj@sam_data$sample) phy_obj@sam_data$group[sel] <- "Clr" sel <- grep(pattern = "Rem1", x = phy_obj@sam_data$sample) phy_obj@sam_data$group[sel] <- "Rem" sel <- grep(pattern = "Rem2", x = phy_obj@sam_data$sample) phy_obj@sam_data$group[sel] <- "Rem" sel <- grep(pattern = "Rem3", x = phy_obj@sam_data$sample) phy_obj@sam_data$group[sel] <- "Rem" head( phy_obj@sam_data[ ,c("sample","samp_type","Treatment","Time","group")] ) # Sample Data: [6 samples by 5 sample variables]: # sample samp_type Treatment Time group # H5s1w1 H5s1w1 soil High Week 1 H5w1 # L2s1w7 L2s1w7 soil Low Week 7 L2w7 # Rem29 Rem29 soil High Source soil Rem # L5s1w1 L5s1w1 soil Low Week 1 L5w1 # Clr26 Clr26 soil Low Source soil Clr # H2s1w7 H2s1w7 soil High Week 7 H2w7 length(unique(phy_obj@sam_data$group)) # 26 sort(unique(phy_obj@sam_data$group)) # [1] "Clr" "H1w1" "H1w7" "H2w1" "H2w7" "H3w1" "H3w7" "H4w1" "H4w7" "H5w1" "H5w7" "H6w1" "H6w7" "L1w1" "L1w7" # [16] "L2w1" "L2w7" "L3w1" "L3w7" "L4w1" "L4w7" "L5w1" "L5w7" "L6w1" "L6w7" "Rem" # bootstrap outputs will automatically be stored in a list: b_out # number of bootstrap resamples B <- 100 # iterate in parallel cl<-makeCluster( detectCores()-1 ) # detectCores()-1 registerDoParallel(cl) # remember to stopcluster() b_out <- foreach(j=1:B, .packages=c('phyloseq')) %dopar% calc_AlphaDiv_in_parallel_any_merge_no(phy_obj=phy_obj, merge_by="group" ) stopCluster(cl) length(b_out) # 100 names(b_out[[1]]) # "sample" "shannon" "group" "eff_no" "calc_type" dim(b_out[[1]]) # 26 5 b_out[[1]] # sample shannon group eff_no calc_type # 1 Clr 5.582610 Clr 265.7645 bootstrap # 2 H1w1 5.888738 H1w1 360.9494 bootstrap # 3 H1w7 5.922157 H1w7 373.2157 bootstrap # 4 H2w1 5.852293 H2w1 348.0315 bootstrap # 5 H2w7 5.866730 H2w7 353.0923 bootstrap # 6 H3w1 5.773728 H3w1 321.7349 bootstrap # 7 H3w7 5.908777 H3w7 368.2555 bootstrap # 8 H4w1 5.835952 H4w1 342.3906 bootstrap # 9 H4w7 5.910237 H4w7 368.7935 bootstrap # 10 H5w1 5.905837 H5w1 367.1743 bootstrap # 11 H5w7 5.914514 H5w7 370.3743 bootstrap # 12 H6w1 5.809601 H6w1 333.4859 bootstrap # 13 H6w7 5.872727 H6w7 355.2163 bootstrap # 14 L1w1 5.543545 L1w1 255.5825 bootstrap # 15 L1w7 5.163747 L1w7 174.8183 bootstrap # 16 L2w1 5.496611 L2w1 243.8641 bootstrap # 17 L2w7 5.103908 L2w7 164.6641 bootstrap # 18 L3w1 5.475788 L3w1 238.8385 bootstrap # 19 L3w7 5.194748 L3w7 180.3228 bootstrap # 20 L4w1 5.450656 L4w1 232.9110 bootstrap # 21 L4w7 5.140608 L4w7 170.8196 bootstrap # 22 L5w1 5.565565 L5w1 261.2727 bootstrap # 23 L5w7 5.144408 L5w7 171.4700 bootstrap # 24 L6w1 5.338649 L6w1 208.2311 bootstrap # 25 L6w7 5.090755 L6w7 162.5126 bootstrap # 26 Rem 5.866909 Rem 353.1557 bootstrap ### 1st calculate diversity from one rarefying step ### then append bootstrap-derived uncertainty...(?) # rarefy #1 seed <- 123 r1.16s <- rarefy_even_depth(phy_obj, sample.size = min(sample_sums(phy_obj)), rngseed = seed, replace = FALSE, trimOTUs = TRUE, verbose = TRUE) shan.r1.16s <- plot_richness(r1.16s, measures=c("Shannon")) shan.r1.16s out <- data.frame(sample=shan.r1.16s$data$samples,shannon=shan.r1.16s$data$value, group=NA ) # out$eff_no <- exp(out$shannon) # calculate effective no of species out$calc_type <- "rarefyx1" str(out) ## fix "group" variable based on "sample" out$sample <- as.character(out$sample) out$group <- paste0( substring(out$sample,1,2), substring(out$sample,nchar(out$sample)-1,nchar(out$sample))) # then overwrite with ... sel <- grep(pattern = "Clr1", x = out$sample) out$group[sel] <- "Clr" sel <- grep(pattern = "Clr2", x = out$sample) out$group[sel] <- "Clr" sel <- grep(pattern = "Clr3", x = out$sample) out$group[sel] <- "Clr" sel <- grep(pattern = "Rem1", x = out$sample) out$group[sel] <- "Rem" sel <- grep(pattern = "Rem2", x = out$sample) out$group[sel] <- "Rem" sel <- grep(pattern = "Rem3", x = out$sample) out$group[sel] <- "Rem" str(out) # 'data.frame': 65 obs. of 5 variables: # $ sample : chr "H5s1w1" "L2s1w7" "Rem29" "L5s1w1" ... # $ shannon : num 5.86 5.06 5.8 5.56 5.09 ... # $ group : chr "H5w1" "L2w7" "Rem" "L5w1" ... # $ eff_no : num 349 157 331 261 162 ... # $ calc_type: chr "rarefyx1" "rarefyx1" "rarefyx1" "rarefyx1" ... names(out) # "sample" "shannon" "group" "eff_no" "calc_type" names(b_out[[1]]) # "sample" "shannon" "group" "eff_no" "calc_type" dim(b_out[[1]]) # 26 5 temp <- out for (j in 1:B) { temp <- rbind(temp,b_out[[j]]) } head(temp) temp[1:50, ] tail(temp) names(temp) # "sample" "shannon" "group" "eff_no" "calc_type" temp$Time <- substring(temp$group, nchar(temp$group)-1, nchar(temp$group)) # fix source soils: "em" "lr" sel <- which(temp$Time=="em") temp$Time[sel] <- "Source\nsoil" sel <- which(temp$Time=="lr") temp$Time[sel] <- "Source\nsoil" levels(factor(temp$Time)) # [1] "Source\nsoil" "w1" "w7" temp$Time <- factor(temp$Time, levels = c("Source\nsoil","w1","w7"), labels=c("Source\nsoil","Week 1","Week 7")) str(temp) # 'data.frame': 2665 obs. of 6 variables: # $ sample : chr "H5s1w1" "L2s1w7" "Rem29" "L5s1w1" ... # $ shannon : num 5.86 5.06 5.8 5.56 5.09 ... # $ group : chr "H5w1" "L2w7" "Rem" "L5w1" ... # $ eff_no : num 349 157 331 261 162 ... # $ calc_type: chr "rarefyx1" "rarefyx1" "rarefyx1" "rarefyx1" ... # $ Time : Factor w/ 3 levels "Source\nsoil",..: 2 3 1 2 1 3 3 2 3 2 ... levels(factor(temp$group)) # [1] "Clr" "H1w1" "H1w7" "H2w1" "H2w7" "H3w1" "H3w7" "H4w1" "H4w7" "H5w1" "H5w7" "H6w1" "H6w7" "L1w1" "L1w7" # [16] "L2w1" "L2w7" "L3w1" "L3w7" "L4w1" "L4w7" "L5w1" "L5w7" "L6w1" "L6w7" "Rem" temp$group <- factor(temp$group, levels = c("Clr","Rem", "L1w1", "L1w7", "L2w1", "L2w7", "L3w1", "L3w7", "L4w1", "L4w7", "L5w1", "L5w7", "L6w1", "L6w7", "H1w1", "H1w7", "H2w1", "H2w7", "H3w1", "H3w7", "H4w1", "H4w7", "H5w1", "H5w7", "H6w1", "H6w7" ), ordered = TRUE) str(temp) melt.out <- melt(temp,id.vars = c("group","calc_type", "Time"), measure.vars = "eff_no") ## plot cols <- c("Clr"="#66c2a5", "Rem"="#5e4fa2", "L1w1"="#66c2a5", "L1w7"="#66c2a5", "L2w1"="#66c2a5", "L2w7"="#66c2a5", "L3w1"="#66c2a5", "L3w7"="#66c2a5", "L4w1"="#66c2a5", "L4w7"="#66c2a5", "L5w1"="#66c2a5", "L5w7"="#66c2a5", "L6w1"="#66c2a5", "L6w7"="#66c2a5", "H1w1"="#5e4fa2", "H1w7"="#5e4fa2", "H2w1"="#5e4fa2", "H2w7"="#5e4fa2", "H3w1"="#5e4fa2", "H3w7"="#5e4fa2", "H4w1"="#5e4fa2", "H4w7"="#5e4fa2", "H5w1"="#5e4fa2", "H5w7"="#5e4fa2", "H6w1"="#5e4fa2", "H6w7"="#5e4fa2", "Low" = "#66c2a5", "High" = "#5e4fa2") shapes <- c("Clr"=15, "Rem"=15, "L1w1"=16, "L1w7"=17, "L2w1"=16, "L2w7"=17, "L3w1"=16, "L3w7"=17, "L4w1"=16, "L4w7"=17, "L5w1"=16, "L5w7"=17, "L6w1"=16, "L6w7"=17, "H1w1"=16, "H1w7"=17, "H2w1"=16, "H2w7"=17, "H3w1"=16, "H3w7"=17, "H4w1"=16, "H4w7"=17, "H5w1"=16, "H5w7"=17, "H6w1"=16, "H6w7"=17 ) # test plot p <- ggplot(data=melt.out, aes(x=group, value)) + #ggtitle("b") + geom_violin(data = melt.out[ which(melt.out$calc_type == "bootstrap"), ], aes(color = group) ) + scale_colour_manual(values = cols) + #geom_point(data = melt.out[ which(melt.out$calc_type == "rarefyx1"), ], color="gray20", shape=1 ) + geom_point(data = melt.out[ which(melt.out$calc_type == "rarefyx1"), ], aes(shape = group) , color="grey40", size=1 ) + scale_shape_manual(values = shapes) + #theme_bw() + theme_classic() + #theme(axis.text.x = element_text(angle=90, vjust=0.5) ) + labs(x = NULL, y = "Effective OTUs (count)") + theme(legend.position="none") + #facet_wrap(~Time, scales="free_x") facet_grid(~Time, scales="free_x", space="free_x") p levels(melt.out$group) # [1] "Clr" "Rem" "L1w1" "L1w7" "L2w1" "L2w7" "L3w1" "L3w7" "L4w1" "L4w7" "L5w1" "L5w7" "L6w1" "L6w7" "H1w1" "H1w7" "H2w1" # [18] "H2w7" "H3w1" "H3w7" "H4w1" "H4w7" "H5w1" "H5w7" "H6w1" "H6w7" substring(levels(melt.out$group),1,2) # [1] "Cl" "Re" "L1" "L1" "L2" "L2" "L3" "L3" "L4" "L4" "L5" "L5" "L6" "L6" "H1" "H1" "H2" "H2" "H3" "H3" "H4" "H4" "H5" "H5" # [25] "H6" "H6" ## Indicate significant differences...? sel <- which(melt.out$calc_type == "rarefyx1") rare1 <- melt.out[sel, ] rare1$group2 <- substring(rare1$group, 1,1 ) str(rare1) rare1$group2 <- factor(rare1$group2, levels = c("C", "H", "L", "R"), labels = c("Clr", "High", "Low", "Rem")) ## remove outliers before statistical test #bp <- boxplot(rare1$value ~ rare1$group2 + rare1$Time) bp <- boxplot(value ~ group2 + Time , data = rare1) sel <- which(rare1$value %in% bp$out) rare1$value[sel] # 308.15276 108.65263 25.05333 rare1[sel,] # group calc_type Time variable value group2 # 33 H2w1 rarefyx1 Week 1 eff_no 308.15276 High # 35 Rem rarefyx1 Source\nsoil eff_no 108.65263 Rem # 50 Clr rarefyx1 Source\nsoil eff_no 25.05333 Clr ## exclude outliers from Kruskal-Wallis significance testing rare1_exout <- rare1[-sel, ] ## Mann-Whitney-Wilcoxon Test wilcox.test(value ~ group2, data=rare1_exout[which(rare1_exout$group2 %in% c("Clr","Rem")), ]) # Wilcoxon rank sum test # data: value by group2 # W = 6, p-value = 0.004662 # alternative hypothesis: true location shift is not equal to 0 table( rare1_exout[which(rare1_exout$group2 %in% c("Clr","Rem")), ]$group2) # Clr High Low Rem # 8 0 0 8 wilcox.test(value ~ group2, data=rare1_exout[which(rare1_exout$group2 %in% c("High","Low") & rare1_exout$Time=="Week 1"), ]) # Wilcoxon rank sum test # data: value by group2 # W = 120, p-value = 3.093e-06 # alternative hypothesis: true location shift is not equal to 0 table( rare1_exout[which(rare1_exout$group2 %in% c("High","Low") & rare1_exout$Time=="Week 1"), ]$group2) # Clr High Low Rem # 0 10 12 0 wilcox.test(value ~ group2, data=rare1_exout[which(rare1_exout$group2 %in% c("High","Low") & rare1_exout$Time=="Week 7"), ]) # Wilcoxon rank sum test # data: value by group2 # W = 144, p-value = 7.396e-07 # alternative hypothesis: true location shift is not equal to 0 table( rare1_exout[which(rare1_exout$group2 %in% c("High","Low") & rare1_exout$Time=="Week 7"), ]$group2) # Clr High Low Rem # 0 12 12 0 hist(melt.out$value) summary(melt.out$value) # create arrow annotation, "Low" = "#f46d43", "High" = "#5e4fa2" anno.arrow <- data.frame(x_start = c(1.5, 1, 7 , 1 ), x_end = c(3, 6, 12 , 6 ), y_start = c(260, 350, 210 , 350 ), y_end = c(250, 350, 180, 355 ), arrow_colors = c("Low", "High", "Low", "High"), # add these to cols vector: "Low" = "#f46d43", "High" = "#5e4fa2" Time = c("Source\nsoil", "Week 1","Week 1", "Week 7")) # also add significance annotation annotation_df <- data.frame( Time = c("Source\nsoil", "Week 1", "Week 7"), start=c("Clr","L1w1","L1w7"), end = c("Rem","H6w1","H6w7"), y = c(408, 408, 408), label = c("**","***","***"), stringsAsFactors = FALSE) str(annotation_df) # ensure no conflict in factor levels annotation_df$start <- factor(annotation_df$start, levels = c("Clr","Rem", "L1w1", "L1w7", "L2w1", "L2w7", "L3w1", "L3w7", "L4w1", "L4w7", "L5w1", "L5w7", "L6w1", "L6w7", "H1w1", "H1w7", "H2w1", "H2w7", "H3w1", "H3w7", "H4w1", "H4w7", "H5w1", "H5w7", "H6w1", "H6w7" ), ordered = TRUE) annotation_df$end <- factor(annotation_df$end, levels = c("Clr","Rem", "L1w1", "L1w7", "L2w1", "L2w7", "L3w1", "L3w7", "L4w1", "L4w7", "L5w1", "L5w7", "L6w1", "L6w7", "H1w1", "H1w7", "H2w1", "H2w7", "H3w1", "H3w7", "H4w1", "H4w7", "H5w1", "H5w7", "H6w1", "H6w7" ), ordered = TRUE) pp <- #p + ggplot(data=melt.out, aes(x=group, value)) + #ggtitle("b") + geom_violin(data = melt.out[ which(melt.out$calc_type == "bootstrap"), ], aes(color = group) ) + scale_colour_manual(values = cols) + geom_point(data = melt.out[ which(melt.out$calc_type == "rarefyx1"), ], aes(shape = group) , color="grey40", size=1 ) + scale_shape_manual(values = shapes) + theme_classic() + labs(x = NULL, y = "Effective OTUs (count)") + theme(legend.position="none") + facet_grid(~Time, scales="free_x", space="free_x") + scale_x_discrete(labels=c( "Clr", "Rem", "L1w1"="L1", "L1w7"="L1", "L2w1"="L2", "L2w7"="L2", "L3w1"="L3", "L3w7"="L3", "L4w1"="L4", "L4w7"="L4", "L5w1"="L5", "L5w7"="L5", "L6w1"="L6", "L6w7"="L6", "H1w1"="H1", "H1w7"="H1", "H2w1"="H2", "H2w7"="H2", "H3w1"="H3", "H3w7"="H3", "H4w1"="H4", "H4w7"="H4", "H5w1"="H5", "H5w7"="H5", "H6w1"="H6", "H6w7"="H6" )) + ## add arrows - based on this post: https://www.r-bloggers.com/adding-different-annotation-to-each-facet-in-ggplot/ geom_segment(data = anno.arrow, aes(x = x_start, y = y_start, xend = x_end, yend = y_end, colour = arrow_colors), #, colour = arrow_colors arrow = arrow(length = unit(5, "pt")) ) + geom_signif(data=annotation_df, aes(xmin=start, xmax=end, annotations=label, y_position=y), textsize = 3.5, size = 0.5, tip_length = 0, margin_top = 0, vjust = 0.4 , # vjust = -0.2 manual=TRUE) + # , size=0.5 ylim(0,415) + theme( #plot.title=element_text(face = "bold", hjust = -0.18), # adjust hjust by trial-and-error #plot.margin = margin(t = 2, r = 2, b = 2, l = 15, "pt"), #plot.margin = margin(t = 2, r = 2, b = 2, l = 5, "pt"), plot.margin = margin(t = 0, r = 0, b = 0, l = 5, "pt"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.ticks.length=unit(-3, "pt"), # axis.text.x = element_text(margin=margin(t = 5,r = 5,b = 2,l = 5,"pt"), angle=60, vjust=0.5, size = rel(0.6)), # axis.text.y = element_text(margin=margin(t = 5,r = 5,b = 5,l = 5,"pt"), size = rel(0.5)), # axis.title.y = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.6)), axis.text.x = element_text(margin=margin(t = 5,r = 5,b = 2,l = 5,"pt"), angle=60, vjust=0.5, size = rel(0.7)), axis.text.y = element_text(margin=margin(t = 5,r = 5,b = 5,l = 5,"pt"), size = rel(0.6)), axis.title.y = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.7)), strip.background = element_rect(fill="white", linetype = "blank"), #strip.text = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt")) #strip.text = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.6)) strip.text = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.7)) ) pp ## change panel widths ## https://stackoverflow.com/questions/31572239/set-space-in-facet-wrap-like-in-facet-grid # Get the ggplot grob gt <- ggplotGrob(pp) # Check for the widths - you need to change the two that are set to 1null gt$widths # [1] 5pt 0cm 1grobwidth 0.642896862460219cm 2.6null # [6] 5.5pt 12.2null 5.5pt 12.2null 0cm # [11] 0cm 0pt 2pt # Change panel widths gt$widths[5] # 2.6null gt$widths[7] # 12.2null gt$widths[9] # 12.2null # Replace the default widths with relative widths: gt$widths[5] <- unit(4.2, "null") # Draw the plot grid.newpage() grid.draw(gt) grid.text(label = "B" , x = unit(0.035, "npc") , y = unit(0.93,"npc"), gp=gpar(fontsize=11, fontface="bold") ) dev.print(tiff, file = paste0("plots/","Alpha-diversity-Source-ALL-soils-with-merged-sample-bootstrap-with-Sig-B-vFINAL2b-Edit-Panel-Widths.tiff"), width = 8.57, height = 5.95, units = "cm", res=600, compression="lzw") #------------------------ #### Ordination of air and fresh bedding (low biomass) samples #------------------------ unique(clean.16s@sam_data$samp_type) # "cecal" "fecal" "air" "soil" "fresh bedding" lowb.dc.16s <- prune_samples(clean.16s@sam_data$samp_type %in% c("air","fresh bedding"), clean.16s) lowb.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 2730 taxa and 78 samples ] # sample_data() Sample Data: [ 78 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 2730 taxa by 7 taxonomic ranks ] min(taxa_sums(lowb.dc.16s)) # 0 # prune taxa that have zero sequence reads lowb.dc.16s <- prune_taxa(taxa = taxa_sums(lowb.dc.16s) > 0, x = lowb.dc.16s) lowb.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 2073 taxa and 78 samples ] # sample_data() Sample Data: [ 78 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 2073 taxa by 7 taxonomic ranks ] min(taxa_sums(lowb.dc.16s)) # 2 min(sample_sums(lowb.dc.16s)) # 345 sort( sample_sums(lowb.dc.16s) ) # bfre1T5 bfre1T13 bfre3T5 L3a2w6 H6a2w6 C2a2w6 L3a1w6 L4a1w6 L5a1w6 bfre2T13 L2a1w6 L2a2w6 # 345 839 945 1307 1400 1681 1683 1719 1850 2179 2254 2289 # L5a2w6 H3a1w6 H4a2w6 H6a1w6 L1a1w6 bfre2T5 H4a1w6 C3a1w1 bfre3T13 L6a2w6 C2a1w6 H1a2w6 # 2722 2867 3024 3370 3374 3459 3577 3584 3646 3902 3917 4950 # H3a2w6 C6a1w6 C5a1w6 H1a1w6 C6a2w6 H4a2w1 L4a2w6 C4a1w6 L6a1w6 L1a2w6 C4a2w6 L5a2w1 # 5110 5419 6900 6997 7738 8409 8563 8798 9648 9919 10442 10463 # H2a2w1 C3a2w6 H4a1w1 H6a2w1 H5a1w1 C1a1w6 C1a2w6 H5a2w6 L6a2w1 H5a2w1 H2a1w1 C5a1w1 # 10712 11767 13535 14330 14831 15195 15677 16244 16250 16953 17243 17636 # H2a2w6 C2a1w1 L1a2w1 C6a1w1 H1a2w1 C4a2w1 L1a1w1 H1a1w1 C3a1w6 C4a1w1 C6a2w1 H3a2w1 # 18218 19387 21463 24075 24373 26899 27130 27239 29960 30520 30695 31116 # H5a1w6 L6a1w1 L4a2w1 L5a1w1 C1a2w1 C5a2w1 L2a2w1 L2a1w1 C2a2w1 C5a2w6 H2a1w6 H3a1w1 # 31405 31626 33465 33470 33612 34760 34876 35718 36100 36880 40548 46030 # L3a2w1 L3a1w1 H6a1w1 C1a1w1 L4a1w1 C3a2w1 # 55905 56460 61540 72623 81978 86863 ## use samples above sequence read threshold of 800 lowb.dc.16s <- prune_samples( samples = sample_sums(lowb.dc.16s) >= 800 , x = lowb.dc.16s ) lowb.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 2073 taxa and 77 samples ] # sample_data() Sample Data: [ 77 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 2073 taxa by 7 taxonomic ranks ] table( lowb.dc.16s@sam_data$samp_type ) # air fresh bedding # 72 5 table( lowb.dc.16s@sam_data$Treatment ) # Control High Low # 24 24 24 table( lowb.dc.16s@sam_data$Time ) # Week 1 Week 2 Week 6 # 36 2 39 ## Use lowest number of reads to create rarefied dataset seed <- 123 rare.lowb.dc.16s <- rarefy_even_depth(lowb.dc.16s, sample.size = min(sample_sums(lowb.dc.16s)), rngseed = seed, replace = FALSE, trimOTUs = TRUE, verbose = TRUE) sample_sums(rare.lowb.dc.16s) # all 839 min(taxa_sums(rare.lowb.dc.16s)) # 1 ntaxa(rare.lowb.dc.16s) # 1743 nsamples(rare.lowb.dc.16s) # 77 ### ORDINATION PLOT [finished-plot] # # # # # # # # # # # # # # # ### NMDS + Bray-Curtis set.seed(123) ord <- ordinate(rare.lowb.dc.16s, "NMDS", "bray") ord # Call: # metaMDS(comm = veganifyOTU(physeq), distance = distance) # # global Multidimensional Scaling using monoMDS # # Data: wisconsin(sqrt(veganifyOTU(physeq))) # Distance: bray # # Dimensions: 2 # Stress: 0.1460756 # Stress type 1, weak ties # No convergent solutions - best solution after 20 tries # Scaling: centring, PC rotation, halfchange scaling # Species: expanded scores based on 'wisconsin(sqrt(veganifyOTU(physeq)))' str(ord) rare.lowb.dc.16s@sam_data$samp_type rare.lowb.dc.16s@sam_data$Time length(unique(rare.lowb.dc.16s@sam_data$samp_type )) # 2 length(unique(rare.lowb.dc.16s@sam_data$Cage.Name )) # 13 p <- plot_ordination(rare.lowb.dc.16s, ord, type="samples", color="Treatment", shape="samp_type") p str(p) #p$data$Treatment <- factor(p$data$Treatment, levels = c("Control","Low","High"), ordered = TRUE) #p$data$Cage.Name <- factor(p$data$Cage.Name, levels = c( paste0("C",1:6),paste0("L",1:6),paste0("H",1:6) ), ordered = TRUE) head(p$data) p$data$Treatment <- as.character(p$data$Treatment) sel <- which(is.na( p$data$Treatment)) p$data$Treatment[sel] <- "N/A" p$data$Treatment <- factor(p$data$Treatment, levels = c("Control","Low","High","N/A"), ordered = TRUE) cols <- c("Control" = "#f46d43", "Low" = "#66c2a5" , "High" = "#5e4fa2", "N/A" = "#999999" ) # orange, aqua, dark-blue shapes <- c(1,2) pp <- p + theme_bw() + scale_colour_manual(values = cols, name="Treatment") + scale_shape_manual(values = shapes, name="Sample type") + #scale_fill_manual(values = cols) + #geom_polygon(aes(fill = Treatment), alpha = 0.3) + annotate(geom="text", x= -2.5, y= -1.5, label = paste0("Stress = ",round(ord$stress,5)), hjust=0, vjust=0, size=3.25 ) + guides( color = guide_legend(order = 1), shape = guide_legend(order = 2) ) pp ## Minimise plot size & display week number sel <- which(p$data$samp_type == "fresh bedding") p$data$samp_type[sel] <- "fresh\nbedding" pp <- p + theme_bw() + #ggtitle("C") + geom_point() + geom_text(label = substr(p$data$Time,start=6,stop=6), size = 2 ,hjust = 0, nudge_x = 0.15) + #geom_point() + geom_text(label = substr(p$data$Time,start=6,stop=6), size = 1.8 ,hjust = 0, nudge_x = 0.15) + scale_colour_manual(values = cols, name="Treatment") + scale_shape_manual(values = shapes, name="Sample\ntype") + #annotate(geom="text", x= 2.4, y= -2.5, label = paste0("Stress = ",round(ord$stress,4)), hjust=1, vjust=0, size= 2.5 ) + # size=rel(0.8) #annotate(geom="text", x= 2.5, y= -2.5, label = paste0("Stress = ",round(ord$stress,4)), hjust=1, vjust=0, size= 2 ) + # size=rel(0.8) annotate(geom="text", x= 2.5, y= -2.5, label = paste0("Stress = ",round(ord$stress,4)), hjust=1, vjust=0, size= 2.5 ) + # size=rel(0.8) guides( color = guide_legend(order = 1), shape = guide_legend(order = 2) ) + labs(x = NULL, y = NULL) + theme( axis.text.x = element_blank(), axis.text.y = element_blank(), axis.ticks = element_blank(), plot.title=element_text(face = "bold", hjust = 0), # adjust hjust by trial-and-error plot.margin = margin(t = 2, r = 5, b = 2, l = 2, "pt"), # l = 10 #legend.key.size = unit(2, "pt"), #legend.text = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.5)), #legend.title = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.6)), legend.key.size = unit(2.5, "pt"), legend.text = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.6)), legend.title = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.7)), legend.margin = margin(t = 0,r = 0,b = 0,l = 2,"pt"), legend.box.spacing = unit(2, "pt"), legend.box.margin = margin(t = 0,r = 0,b = 0,l = 0,"pt"), legend.spacing = unit(4, "pt"), panel.grid.major = element_blank(), panel.grid.minor = element_blank() ) pp grid.text(label = "C" , x = unit(0.075, "npc") , y = unit(0.93,"npc"), gp=gpar(fontsize=11, fontface="bold") ) dev.print(tiff, file = paste0("plots/","Ordination-Air-FreshBedding-withWeekNo-1xRarefy-by-Treatment-and-Sample-Type-C-vFINAL.tiff"), width = 7, height = 5.95, units = "cm", res=600, compression="lzw") table(p$data$Treatment) # Control Low High N/A # 24 24 24 5 table(p$data$Time) # Week 1 Week 2 Week 6 # 36 2 39 # Test hypothesis that microbiota vary (with different centroids) by Treatment # exclude fresh bedding from analysis sel <- which( is.na(rare.lowb.dc.16s@sam_data$Treatment) ) # qty 5 keep_samples <- sample_names(rare.lowb.dc.16s)[-sel] rare.lowb.dc.16s.exclbfre <- prune_samples(samples = keep_samples, x = rare.lowb.dc.16s) min( sample_sums(rare.lowb.dc.16s.exclbfre) ) # 839 min( taxa_sums(rare.lowb.dc.16s.exclbfre) ) # 0 # prune taxa that have zero sequence reads rare.lowb.dc.16s.exclbfre <- prune_taxa(taxa = taxa_sums(rare.lowb.dc.16s.exclbfre) > 0, x = rare.lowb.dc.16s.exclbfre) rare.lowb.dc.16s.exclbfre # phyloseq-class experiment-level object # otu_table() OTU Table: [ 1735 taxa and 72 samples ] # sample_data() Sample Data: [ 72 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 1735 taxa by 7 taxonomic ranks ] # Calculate bray curtis distance matrix set.seed(123) bray.rare.lowb.dc.16s.exclbfre <- phyloseq::distance(rare.lowb.dc.16s.exclbfre, method = "bray") sampledf <- data.frame(sample_data(rare.lowb.dc.16s.exclbfre)) str(sampledf) # # Adonis test # set.seed(123) # adonis(bray.rare.lowb.dc.16s.exclbfre ~ Treatment, data = sampledf) # # Call: # # adonis(formula = bray.rare.lowb.dc.16s.exclbfre ~ Treatment, data = sampledf) # # # # Permutation: free # # Number of permutations: 999 # # # # Terms added sequentially (first to last) # # # # Df SumsOfSqs MeanSqs F.Model R2 Pr(>F) # # Treatment 2 2.8957 1.44787 6.6488 0.16158 0.001 *** # # Residuals 69 15.0258 0.21776 0.83842 # # Total 71 17.9215 1.00000 # # --- # # Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 # Adonis test set.seed(123) adonis(bray.rare.lowb.dc.16s.exclbfre ~ Treatment + Time , data = sampledf) # Call: # adonis(formula = bray.rare.lowb.dc.16s.exclbfre ~ Treatment + Time, data = sampledf) # # Permutation: free # Number of permutations: 999 # # Terms added sequentially (first to last) # # Df SumsOfSqs MeanSqs F.Model R2 Pr(>F) # Treatment 2 2.8957 1.44787 7.2118 0.16158 0.001 *** # Time 1 1.3737 1.37374 6.8425 0.07665 0.001 *** # Residuals 68 13.6520 0.20077 0.76177 # Total 71 17.9215 1.00000 # --- # Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 # Homogeneity of dispersion test beta <- betadisper(bray.rare.lowb.dc.16s.exclbfre, sampledf$Treatment) set.seed(123) permutest(beta) # Permutation test for homogeneity of multivariate dispersions # Permutation: free # Number of permutations: 999 # # Response: Distances # Df Sum Sq Mean Sq F N.Perm Pr(>F) # Groups 2 0.75066 0.37533 47.496 999 0.001 *** # Residuals 69 0.54526 0.00790 # --- # Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 # #------------------------ #### Alpha diversity of air samples- including merged sample bootstrap resampling #------------------------ ## the function below uses merged-sample bootstrap resampling to estimate the ## overall site-level alpha diversity. ## merging by group for triplicates (source soils) and duplicates (treatment soils in trays) ## create function to rarefy for initial sample > merge samples by type > rarefy again > calculate shannon's index # # # # # # # # # # # # calc_AlphaDiv_in_parallel_any_merge_no <- function(phy_obj, merge_by ) { # rarefy seed <- 123+j r16s <- rarefy_even_depth(phy_obj, sample.size = min(sample_sums(phy_obj)), rngseed = seed, replace = TRUE, trimOTUs = TRUE, verbose = TRUE) # merge samples merged.r16s <- merge_samples(r16s, group= eval(parse(text= paste0("r16s@sam_data$",merge_by))) ) # rarefy back to min sample size for this bootstrap sample - i.e. rarefied-merged-rarefied seed <- 234+j rmr16s <- rarefy_even_depth(merged.r16s, sample.size = min(sample_sums(phy_obj)), rngseed = seed, replace = TRUE, trimOTUs = TRUE, verbose = TRUE) # - - - - - - - - - - ### ALPHA DIVERSITY ## calculate Shannon's index shan.rmr16s <- plot_richness(rmr16s, measures=c("Shannon")) # export data out <- data.frame(sample=shan.rmr16s$data$samples,shannon=shan.rmr16s$data$value, group=shan.rmr16s$data$samples) out$eff_no <- exp(out$shannon) out$calc_type <- "bootstrap" return(out) } # # # # # # # # # # # # ## apply this to previously decontaminated dataset: lowb.dc.16s air.dc.16s <- prune_samples(lowb.dc.16s@sam_data$samp_type == "air", lowb.dc.16s) air.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 2073 taxa and 72 samples ] # sample_data() Sample Data: [ 72 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 2073 taxa by 7 taxonomic ranks ] min(taxa_sums(air.dc.16s)) # 1 min(sample_sums(air.dc.16s)) # 1307 sort( sample_sums(air.dc.16s) ) # L3a2w6 H6a2w6 C2a2w6 L3a1w6 L4a1w6 L5a1w6 L2a1w6 L2a2w6 L5a2w6 H3a1w6 H4a2w6 H6a1w6 L1a1w6 H4a1w6 C3a1w1 L6a2w6 # 1307 1400 1681 1683 1719 1850 2254 2289 2722 2867 3024 3370 3374 3577 3584 3902 # C2a1w6 H1a2w6 H3a2w6 C6a1w6 C5a1w6 H1a1w6 C6a2w6 H4a2w1 L4a2w6 C4a1w6 L6a1w6 L1a2w6 C4a2w6 L5a2w1 H2a2w1 C3a2w6 # 3917 4950 5110 5419 6900 6997 7738 8409 8563 8798 9648 9919 10442 10463 10712 11767 # H4a1w1 H6a2w1 H5a1w1 C1a1w6 C1a2w6 H5a2w6 L6a2w1 H5a2w1 H2a1w1 C5a1w1 H2a2w6 C2a1w1 L1a2w1 C6a1w1 H1a2w1 C4a2w1 # 13535 14330 14831 15195 15677 16244 16250 16953 17243 17636 18218 19387 21463 24075 24373 26899 # L1a1w1 H1a1w1 C3a1w6 C4a1w1 C6a2w1 H3a2w1 H5a1w6 L6a1w1 L4a2w1 L5a1w1 C1a2w1 C5a2w1 L2a2w1 L2a1w1 C2a2w1 C5a2w6 # 27130 27239 29960 30520 30695 31116 31405 31626 33465 33470 33612 34760 34876 35718 36100 36880 # H2a1w6 H3a1w1 L3a2w1 L3a1w1 H6a1w1 C1a1w1 L4a1w1 C3a2w1 # 40548 46030 55905 56460 61540 72623 81978 86863 table( air.dc.16s@sam_data$Time ) # Week 1 Week 6 # 36 36 table( air.dc.16s@sam_data$Treatment ) # Control High Low # 24 24 24 names( air.dc.16s@sam_data ) # [1] "mouseID" "sample" "samp_type" # [4] "Treatment" "Time" "Animal.no" # [7] "Sex" "Litter" "Date.of.Birth" # [10] "Cage.no" "Cage.Name" "Rack" # [13] "Ear.notch.ID" "Ear.notch.pos" "Age_received_20_Aug" # [16] "Age_first_soil_exposure_27_Aug" "Rack_position" "Sample_or_Control" # [19] "is.neg" "quant_reading" air.dc.16s@sam_data[ ,c("sample","samp_type","Treatment","Time")] phy_obj <- air.dc.16s phy_obj@sam_data[ ,c("sample","samp_type","Treatment","Time")] # create grouping variable for each site phy_obj@sam_data$group <- NA phy_obj@sam_data$group <- paste0( substring(phy_obj@sam_data$sample,1,2), substring(phy_obj@sam_data$sample,nchar(phy_obj@sam_data$sample)-1,nchar(phy_obj@sam_data$sample))) head( phy_obj@sam_data[ ,c("sample","samp_type","Treatment","Time","group")] ) # Sample Data: [6 samples by 5 sample variables]: # sample samp_type Treatment Time group # C5a1w1 C5a1w1 air Control Week 1 C5w1 # L1a2w1 L1a2w1 air Low Week 1 L1w1 # C3a1w6 C3a1w6 air Control Week 6 C3w6 # H1a2w1 H1a2w1 air High Week 1 H1w1 # C3a2w6 C3a2w6 air Control Week 6 C3w6 # C5a2w1 C5a2w1 air Control Week 1 C5w1 length(unique(phy_obj@sam_data$group)) # 36 sort(unique(phy_obj@sam_data$group)) # [1] "C1w1" "C1w6" "C2w1" "C2w6" "C3w1" "C3w6" "C4w1" "C4w6" "C5w1" "C5w6" "C6w1" "C6w6" # [13] "H1w1" "H1w6" "H2w1" "H2w6" "H3w1" "H3w6" "H4w1" "H4w6" "H5w1" "H5w6" "H6w1" "H6w6" # [25] "L1w1" "L1w6" "L2w1" "L2w6" "L3w1" "L3w6" "L4w1" "L4w6" "L5w1" "L5w6" "L6w1" "L6w6" # bootstrap outputs will automatically be stored in a list: b_out # number of bootstrap resamples B <- 100 # iterate in parallel cl<-makeCluster( detectCores()-1 ) # detectCores()-1 registerDoParallel(cl) # remember to stopcluster() b_out <- foreach(j=1:B, .packages=c('phyloseq')) %dopar% calc_AlphaDiv_in_parallel_any_merge_no(phy_obj=phy_obj, merge_by="group" ) stopCluster(cl) getwd() # "C:/Workspace/PROJ/PAPER-MICRO-MICE/modelling" saveRDS(b_out, file = "b_out__alpha_diversity_AIR.RDS") length(b_out) # 100 names(b_out[[1]]) # "sample" "shannon" "group" "eff_no" "calc_type" dim(b_out[[1]]) # 36 5 b_out[[1]] # sample shannon group eff_no calc_type # 1 C1w1 4.246433 C1w1 69.85580 bootstrap # 2 C1w6 3.670383 C1w6 39.26693 bootstrap # 3 C2w1 3.746987 C2w1 42.39316 bootstrap # 4 C2w6 3.481686 C2w6 32.51448 bootstrap # 5 C3w1 3.458858 C3w1 31.78065 bootstrap # 6 C3w6 3.529948 C3w6 34.12219 bootstrap # 7 C4w1 3.761194 C4w1 42.99975 bootstrap # 8 C4w6 3.903370 C4w6 49.56919 bootstrap # 9 C5w1 3.690483 C5w1 40.06419 bootstrap # 10 C5w6 3.846638 C5w6 46.83533 bootstrap # 11 C6w1 3.672844 C6w1 39.36371 bootstrap # 12 C6w6 3.841666 C6w6 46.60305 bootstrap # 13 H1w1 5.494804 H1w1 243.42381 bootstrap # 14 H1w6 3.593221 H1w6 36.35097 bootstrap # 15 H2w1 5.402024 H2w1 221.85507 bootstrap # 16 H2w6 3.657781 H2w6 38.77521 bootstrap # 17 H3w1 5.377068 H3w1 216.38684 bootstrap # 18 H3w6 4.046765 H3w6 57.21209 bootstrap # 19 H4w1 5.435367 H4w1 229.37712 bootstrap # 20 H4w6 3.928525 H4w6 50.83196 bootstrap # 21 H5w1 4.995722 H5w1 147.77965 bootstrap # 22 H5w6 3.696729 H5w6 40.31524 bootstrap # 23 H6w1 5.069572 H6w1 159.10628 bootstrap # 24 H6w6 4.568162 H6w6 96.36681 bootstrap # 25 L1w1 5.304356 L1w1 201.21142 bootstrap # 26 L1w6 3.773489 L1w6 43.53168 bootstrap # 27 L2w1 5.325298 L2w1 205.46955 bootstrap # 28 L2w6 3.776049 L2w6 43.64328 bootstrap # 29 L3w1 5.481867 L3w1 240.29487 bootstrap # 30 L3w6 3.987824 L3w6 53.93742 bootstrap # 31 L4w1 4.647823 L4w1 104.35756 bootstrap # 32 L4w6 3.846751 L4w6 46.84065 bootstrap # 33 L5w1 5.195952 L5w1 180.53999 bootstrap # 34 L5w6 3.507482 L5w6 33.36415 bootstrap # 35 L6w1 5.241539 L6w1 188.96064 bootstrap # 36 L6w6 4.300082 L6w6 73.70586 bootstrap ### 1st calculate diversity from one rarefying step ### then append bootstrap-derived uncertainty... # rarefy #1 seed <- 123 r1.16s <- rarefy_even_depth(phy_obj, sample.size = min(sample_sums(phy_obj)), rngseed = seed, replace = FALSE, trimOTUs = TRUE, verbose = TRUE) shan.r1.16s <- plot_richness(r1.16s, measures=c("Shannon")) shan.r1.16s out <- data.frame(sample=shan.r1.16s$data$samples,shannon=shan.r1.16s$data$value, group=NA ) # out$eff_no <- exp(out$shannon) # calculate effective no of species out$calc_type <- "rarefyx1" str(out) ## fix "group" variable based on "sample" out$sample <- as.character(out$sample) out$group <- paste0( substring(out$sample,1,2), substring(out$sample,nchar(out$sample)-1,nchar(out$sample))) str(out) # 'data.frame': 72 obs. of 5 variables: # $ sample : chr "C5a1w1" "L1a2w1" "C3a1w6" "H1a2w1" ... # $ shannon : num 3.62 4.98 3.44 5.22 3.52 ... # $ group : chr "C5w1" "L1w1" "C3w6" "H1w1" ... # $ eff_no : num 37.5 145.7 31.1 185.4 33.7 ... # $ calc_type: chr "rarefyx1" "rarefyx1" "rarefyx1" "rarefyx1" ... names(out) # "sample" "shannon" "group" "eff_no" "calc_type" names(b_out[[1]]) # "sample" "shannon" "group" "eff_no" "calc_type" dim(b_out[[1]]) # 36 5 temp <- out for (j in 1:B) { temp <- rbind(temp,b_out[[j]]) } head(temp) temp[1:50, ] tail(temp) names(temp) # "sample" "shannon" "group" "eff_no" "calc_type" levels(factor(temp$group)) # [1] "C1w1" "C1w6" "C2w1" "C2w6" "C3w1" "C3w6" "C4w1" "C4w6" "C5w1" "C5w6" "C6w1" "C6w6" # [13] "H1w1" "H1w6" "H2w1" "H2w6" "H3w1" "H3w6" "H4w1" "H4w6" "H5w1" "H5w6" "H6w1" "H6w6" # [25] "L1w1" "L1w6" "L2w1" "L2w6" "L3w1" "L3w6" "L4w1" "L4w6" "L5w1" "L5w6" "L6w1" "L6w6" temp$group <- factor(temp$group, levels = c( "C1w1", "C1w6", "C2w1", "C2w6", "C3w1", "C3w6", "C4w1", "C4w6", "C5w1", "C5w6", "C6w1", "C6w6", "L1w1", "L1w6", "L2w1", "L2w6", "L3w1", "L3w6", "L4w1", "L4w6", "L5w1", "L5w6", "L6w1", "L6w6", "H1w1", "H1w6", "H2w1", "H2w6", "H3w1", "H3w6", "H4w1", "H4w6", "H5w1", "H5w6", "H6w1", "H6w6" ), ordered = TRUE) temp$Time <- substring(temp$group, first = nchar(as.character(temp$group))-1, last = nchar(as.character(temp$group))) temp$Time <- factor(temp$Time, levels = c("w1", "w6"), labels = c("Week 1", "Week 6")) str(temp) # 'data.frame': 3672 obs. of 6 variables: # $ sample : chr "C5a1w1" "L1a2w1" "C3a1w6" "H1a2w1" ... # $ shannon : num 3.62 4.98 3.44 5.22 3.52 ... # $ group : Ord.factor w/ 36 levels "C1w1"<"C1w6"<..: 9 13 6 25 6 9 13 25 24 4 ... # $ eff_no : num 37.5 145.7 31.1 185.4 33.7 ... # $ calc_type: chr "rarefyx1" "rarefyx1" "rarefyx1" "rarefyx1" ... # $ Time : Factor w/ 2 levels "Week 1","Week 6": 1 1 2 1 2 1 1 1 2 2 ... temp.air.alpha <- temp melt.out <- melt(temp,id.vars = c("group","calc_type", "Time"), measure.vars = "eff_no") ## plot #cols <- c("Control" = "#f46d43", "Low" = "#66c2a5" , "High" = "#5e4fa2" ) # orange, aqua, dark-blue cols <- c( "Control" ="#f46d43", "Low" ="#66c2a5", "High" ="#5e4fa2", "C1w1"="#f46d43", "C1w6"="#f46d43", "C2w1"="#f46d43", "C2w6"="#f46d43", "C3w1"="#f46d43", "C3w6"="#f46d43", "C4w1"="#f46d43", "C4w6"="#f46d43", "C5w1"="#f46d43", "C5w6"="#f46d43", "C6w1"="#f46d43", "C6w6"="#f46d43", "L1w1"="#66c2a5", "L1w6"="#66c2a5", "L2w1"="#66c2a5", "L2w6"="#66c2a5", "L3w1"="#66c2a5", "L3w6"="#66c2a5", "L4w1"="#66c2a5", "L4w6"="#66c2a5", "L5w1"="#66c2a5", "L5w6"="#66c2a5", "L6w1"="#66c2a5", "L6w6"="#66c2a5", "H1w1"="#5e4fa2", "H1w6"="#5e4fa2", "H2w1"="#5e4fa2", "H2w6"="#5e4fa2", "H3w1"="#5e4fa2", "H3w6"="#5e4fa2", "H4w1"="#5e4fa2", "H4w6"="#5e4fa2", "H5w1"="#5e4fa2", "H5w6"="#5e4fa2", "H6w1"="#5e4fa2", "H6w6"="#5e4fa2" ) shapes <- c( "C1w1"=16, "C1w6"=17, "C2w1"=16, "C2w6"=17, "C3w1"=16, "C3w6"=17, "C4w1"=16, "C4w6"=17, "C5w1"=16, "C5w6"=17, "C6w1"=16, "C6w6"=17, "L1w1"=16, "L1w6"=17, "L2w1"=16, "L2w6"=17, "L3w1"=16, "L3w6"=17, "L4w1"=16, "L4w6"=17, "L5w1"=16, "L5w6"=17, "L6w1"=16, "L6w6"=17, "H1w1"=16, "H1w6"=17, "H2w1"=16, "H2w6"=17, "H3w1"=16, "H3w6"=17, "H4w1"=16, "H4w6"=17, "H5w1"=16, "H5w6"=17, "H6w1"=16, "H6w6"=17 ) p <- ggplot(data=melt.out, aes(x=group, value)) + #ggtitle("d") + geom_violin(data = melt.out[ which(melt.out$calc_type == "bootstrap"), ], aes(color = group) ) + scale_colour_manual(values = cols) + #geom_point(data = melt.out[ which(melt.out$calc_type == "rarefyx1"), ], color="gray20", shape=1 ) + geom_point(data = melt.out[ which(melt.out$calc_type == "rarefyx1"), ], aes(shape = group) , color="grey40", size=1 ) + scale_shape_manual(values = shapes) + #theme_bw() + theme_classic() + #theme(axis.text.x = element_text(angle=90, vjust=0.5) ) + labs(x = NULL, y = "Effective OTUs (count)") + theme(legend.position="none") + #facet_grid(~Time) facet_grid(~Time, scales="free_x", space="free_x") p ## Indicate significant differences...? sel <- which(melt.out$calc_type == "rarefyx1") rare1 <- melt.out[sel, ] rare1$group2 <- substring(rare1$group, 1,1 ) str(rare1) levels(factor(rare1$group2)) rare1$group2 <- factor(rare1$group2, levels = c("C", "L", "H"), labels = c("Control", "Low", "High")) bp <- boxplot(rare1$value ~ rare1$group2 + rare1$Time ) ## exclude outliers from Kruskal-Wallis significance testing sel <- which(rare1$value %in% bp$out) rare1$value[sel] rare1[sel, ] # group calc_type Time variable value group2 # 12 H6w6 rarefyx1 Week 6 eff_no 66.52506 High # 55 C1w1 rarefyx1 Week 1 eff_no 57.26172 Control # 59 C1w1 rarefyx1 Week 1 eff_no 71.00205 Control rare1_exout <- rare1[-sel, ] # Kruskal-Wallis test kt <- kruskal.test( value ~ group2, rare1_exout[which(rare1_exout$Time=="Week 1"), ]) kt # Kruskal-Wallis rank sum test # data: value by group2 # Kruskal-Wallis chi-squared = 20.632, df = 2, p-value = 3.31e-05 table( rare1_exout[which(rare1_exout$Time=="Week 1"), ]$group2 ) # Control Low High # 10 12 12 ## Dunn Test uses factor vector or non-numeric vector that can be coerced to a factor vector pt <- dunnTest( value ~ factor(as.character(group2)), data= rare1_exout[which(rare1_exout$Time=="Week 1"), ], method = "bonferroni" ) pt # Dunn (1964) Kruskal-Wallis multiple comparison # p-values adjusted with the Bonferroni method. # # Comparison Z P.unadj P.adj # 1 Control - High -4.104256 4.056176e-05 0.0001216853 # 2 Control - Low -3.869727 1.089571e-04 0.0003268714 # 3 High - Low 0.245976 8.057008e-01 1.0000000000 pt$dtres pt <- pt$res pt cldList(comparison = pt$Comparison, p.value = pt$P.adj, threshold = 0.05) # Group Letter MonoLetter # 1 Control a a # 2 High b b # 3 Low b b # Kruskal-Wallis test kt <- kruskal.test( value ~ group2, rare1_exout[which(rare1_exout$Time=="Week 6"), ]) kt # Kruskal-Wallis rank sum test # data: value by group2 # Kruskal-Wallis chi-squared = 0.48651, df = 2, p-value = 0.7841 table( rare1_exout[which(rare1_exout$Time=="Week 6"), ]$group2 ) # Control Low High # 12 12 11 # create arrow annotation, "Low" = "#f46d43", "High" = "#5e4fa2" anno <- data.frame(x_start = c(10, 0, 5 ), x_end = c(15, 5, 10 ), y_start = c(40, 115, 145 ), y_end = c(40, 80, 100 ), arrow_colors = c("Control", "Low", "High"), # add these to cols vector: "Low" = "#f46d43", "High" = "#5e4fa2" Time = c("Week 1","Week 6", "Week 6")) # also add significance annotation annotation_df <- data.frame( Time = c("Week 1"), start=c("C1w1"), end = c("H6w1"), y = c(280), label = c("***"), stringsAsFactors = FALSE) str(annotation_df) # ensure no conflict in factor levels annotation_df$start <- factor(annotation_df$start, levels = c( "C1w1", "C1w6", "C2w1", "C2w6", "C3w1", "C3w6", "C4w1", "C4w6", "C5w1", "C5w6", "C6w1", "C6w6", "L1w1", "L1w6", "L2w1", "L2w6", "L3w1", "L3w6", "L4w1", "L4w6", "L5w1", "L5w6", "L6w1", "L6w6", "H1w1", "H1w6", "H2w1", "H2w6", "H3w1", "H3w6", "H4w1", "H4w6", "H5w1", "H5w6", "H6w1", "H6w6" ), ordered = TRUE) annotation_df$end <- factor(annotation_df$end, levels = c( "C1w1", "C1w6", "C2w1", "C2w6", "C3w1", "C3w6", "C4w1", "C4w6", "C5w1", "C5w6", "C6w1", "C6w6", "L1w1", "L1w6", "L2w1", "L2w6", "L3w1", "L3w6", "L4w1", "L4w6", "L5w1", "L5w6", "L6w1", "L6w6", "H1w1", "H1w6", "H2w1", "H2w6", "H3w1", "H3w6", "H4w1", "H4w6", "H5w1", "H5w6", "H6w1", "H6w6" ), ordered = TRUE) hist(melt.out$value) pp <- #p + ggplot(data=melt.out, aes(x=group, value)) + #ggtitle("d") + geom_violin(data = melt.out[ which(melt.out$calc_type == "bootstrap"), ], aes(color = group) ) + scale_colour_manual(values = cols) + geom_point(data = melt.out[ which(melt.out$calc_type == "rarefyx1"), ], aes(shape = group) , color="grey40", size=1 ) + scale_shape_manual(values = shapes) + theme_classic() + labs(x = NULL, y = "Effective OTUs (count)") + theme(legend.position="none") + facet_grid(~Time, scales="free_x", space="free_x") + scale_x_discrete(labels=c( "C1w1"="C1", "C1w6"="C1", "C2w1"="C2", "C2w6"="C2", "C3w1"="C3", "C3w6"="C3", "C4w1"="C4", "C4w6"="C4", "C5w1"="C5", "C5w6"="C5", "C6w1"="C6", "C6w6"="C6", "L1w1"="L1", "L1w6"="L1", "L2w1"="L2", "L2w6"="L2", "L3w1"="L3", "L3w6"="L3", "L4w1"="L4", "L4w6"="L4", "L5w1"="L5", "L5w6"="L5", "L6w1"="L6", "L6w6"="L6", "H1w1"="H1", "H1w6"="H1", "H2w1"="H2", "H2w6"="H2", "H3w1"="H3", "H3w6"="H3", "H4w1"="H4", "H4w6"="H4", "H5w1"="H5", "H5w6"="H5", "H6w1"="H6", "H6w6"="H6" )) + # add arrows - based on this post: https://www.r-bloggers.com/adding-different-annotation-to-each-facet-in-ggplot/ geom_segment(data = anno, aes(x = x_start, y = y_start, xend = x_end, yend = y_end, colour = arrow_colors), #, colour = arrow_colors arrow = arrow(length = unit(5, "pt")) ) + geom_signif(data=annotation_df, aes(xmin=start, xmax=end, annotations=label, y_position=y), textsize = 3.5, size = 0.5, tip_length = 0, margin_top = 0, vjust = 0.4 , # vjust = -0.2 manual=TRUE) + ylim(0,290) + theme( #plot.title=element_text(face = "bold", hjust = -0.18), # adjust hjust by trial-and-error #plot.margin = margin(t = 2, r = 2, b = 2, l = 5, "pt"), plot.margin = margin(t = 5, r = 0, b = 0, l = 5, "pt"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.ticks.length=unit(-3, "pt"), # axis.text.x = element_text(margin=margin(t = 5,r = 5,b = 2,l = 5,"pt"), angle=60, vjust=0.5, size = rel(0.6)), # axis.text.y = element_text(margin=margin(t = 5,r = 5,b = 5,l = 5,"pt"), size = rel(0.5)), # axis.title.y = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.6)), axis.text.x = element_text(margin=margin(t = 5,r = 5,b = 2,l = 5,"pt"), angle=60, vjust=0.5, size = rel(0.7)), axis.text.y = element_text(margin=margin(t = 5,r = 5,b = 5,l = 5,"pt"), size = rel(0.6)), axis.title.y = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.7)), strip.background = element_rect(fill="white", linetype = "blank"), #strip.text = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.6)) strip.text = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.7)) ) pp grid.text(label = "D" , x = unit(0.035, "npc") , y = unit(0.93,"npc"), gp=gpar(fontsize=11, fontface="bold") ) dev.print(tiff, file = paste0("plots/","Alpha-diversity-AIR-samples-with-merged-sample-bootstrap-with-Sig-D-vFINAL.tiff"), width = 8.57, height = 5.95, units = "cm", res=600, compression="lzw") #------------------------ #### Ordination of fecal Week 0 only samples #------------------------ unique(clean.16s@sam_data$samp_type) # "cecal" "fecal" "air" "soil" "fresh bedding" fecalw0.dc.16s <- prune_samples( clean.16s@sam_data$samp_type == "fecal" & clean.16s@sam_data$Time == "Week 0", clean.16s ) fecalw0.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 2730 taxa and 54 samples ] # sample_data() Sample Data: [ 54 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 2730 taxa by 7 taxonomic ranks ] min(sample_sums(fecalw0.dc.16s)) # 11450 min(taxa_sums(fecalw0.dc.16s)) # 0 # prune taxa that have zero sequence reads fecalw0.dc.16s <- prune_taxa(taxa = taxa_sums(fecalw0.dc.16s) > 0, x = fecalw0.dc.16s) fecalw0.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 732 taxa and 54 samples ] # sample_data() Sample Data: [ 54 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 732 taxa by 7 taxonomic ranks ] min(taxa_sums(fecalw0.dc.16s)) # 1 table(fecalw0.dc.16s@sam_data$Cage.Name) # C1 C2 C3 C4 C5 C6 H1 H2 H3 H4 H5 H6 L1 L2 L3 L4 L5 L6 # 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 ## Use lowest number of reads to create rarefied dataset seed <- 123 rare.fecalw0.dc.16s <- rarefy_even_depth(fecalw0.dc.16s, sample.size = min(sample_sums(fecalw0.dc.16s)), rngseed = seed, replace = FALSE, trimOTUs = TRUE, verbose = TRUE) sample_sums(rare.fecalw0.dc.16s) # all 11450 ntaxa(rare.fecalw0.dc.16s) # 610 nsamples(rare.fecalw0.dc.16s) # 54 ### ORDINATION PLOT [finished-plot] # # # # # # # # # # # # # # # ### NMDS + Bray-Curtis set.seed(123) ord <- ordinate(rare.fecalw0.dc.16s, "NMDS", "bray") ord # Call: # metaMDS(comm = veganifyOTU(physeq), distance = distance) # # global Multidimensional Scaling using monoMDS # # Data: wisconsin(sqrt(veganifyOTU(physeq))) # Distance: bray # # Dimensions: 2 # Stress: 0.1573245 # Stress type 1, weak ties # Two convergent solutions found after 20 tries # Scaling: centring, PC rotation, halfchange scaling # Species: expanded scores based on 'wisconsin(sqrt(veganifyOTU(physeq)))' str(ord) rare.fecalw0.dc.16s@sam_data$samp_type # all "fecal" rare.fecalw0.dc.16s@sam_data$Time # all "Week 1" length(unique(rare.fecalw0.dc.16s@sam_data$Litter )) # 15 length(unique(rare.fecalw0.dc.16s@sam_data$Cage.Name )) # 18 ## Display by Litter p <- plot_ordination(rare.fecalw0.dc.16s, ord, type="samples", color="Litter", shape = "Litter") p str(p) p$data$Treatment <- factor(p$data$Treatment, levels = c("Control","Low","High"), ordered = TRUE) unique( p$data$Litter ) # http://sape.inf.usi.ch/quick-reference/ggplot2/colour cols <- c( "blue" , "dodgerblue" , "navy" , "chartreuse3" , "springgreen3" , "olivedrab4", "seagreen4", "green" , "orange", "tomato", "magenta3", "hotpink", "purple", "red" , "firebrick" ) shapes <- c(1:15) pp <- p + theme_bw() + #ggtitle("A") + scale_colour_manual(values = cols) + scale_shape_manual(values = shapes) + geom_polygon(aes(fill = Litter), alpha = 0) + annotate(geom="text", x= -3.5, y= -0.6, label = paste0("Stress = ",round(ord$stress,4)), hjust=0, vjust=0, size=2.5 ) + #guides(col = guide_legend(ncol = 2) ) + labs(x = NULL, y = NULL) + theme( plot.margin = margin(t = 2, r = 5, b = 2, l = 2, "pt"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_blank(), axis.text.y = element_blank(), axis.ticks = element_blank(), legend.box.margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), legend.key.size = unit(2, "pt"), #legend.spacing = unit(20, "pt"), #legend.text = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.5)), legend.text = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.5)), legend.title = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.7)), legend.margin = margin(t = 0,r = 0,b = 0,l = 5,"pt"), legend.box.spacing = unit(2, "pt") ) pp grid.text(label = "A" , x = unit(0.055, "npc") , y = unit(0.94,"npc"), gp=gpar(fontsize=11, fontface="bold") ) dev.print(tiff, file = paste0("plots/","Ordination-Fecal-Week0-only-1xRarefy-by-Litters-vFINAL.tiff"), width = 8.3, height = 7, units = "cm", res=600, compression="lzw") ## Display by Litter and Cage distribution p <- plot_ordination(rare.fecalw0.dc.16s, ord, type="samples", color="Litter", shape="Cage.Name") p str(p) p$data$Treatment <- factor(p$data$Treatment, levels = c("Control","Low","High"), ordered = TRUE) p$data$Cage.Name <- factor(p$data$Cage.Name, levels = c( paste0("C",1:6),paste0("L",1:6),paste0("H",1:6) ), ordered = TRUE) # http://sape.inf.usi.ch/quick-reference/ggplot2/colour cols <- c( "blue" , "dodgerblue" , "navy" , "chartreuse3" , "springgreen3" , "olivedrab4", "seagreen4", "green" , "orange", "tomato", "magenta3", "hotpink", "purple", "red" , "firebrick" ) shapes <- c(1:18) pp <- p + theme_bw() + #ggtitle("B") + scale_colour_manual(values = cols) + scale_shape_manual(values = shapes, name = "Enclosure") + guides(col = guide_legend(ncol = 2), shape = guide_legend(ncol = 3)) + annotate(geom="text", x= -3.5, y= -0.6, label = paste0("Stress = ",round(ord$stress,4)), hjust=0, vjust=0, size=2.5 ) + labs(x = NULL, y = NULL) + theme( plot.margin = margin(t = 2, r = 5, b = 2, l = 2, "pt"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_blank(), axis.text.y = element_blank(), axis.ticks = element_blank(), legend.box.margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), legend.key.size = unit(2, "pt"), #legend.text = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.5)), legend.text = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.5)), legend.title = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.7)), legend.margin = margin(t = 0,r = 0,b = 0,l = 5,"pt"), legend.box.spacing = unit(2, "pt") ) pp grid.text(label = "B" , x = unit(0.055, "npc") , y = unit(0.94,"npc"), gp=gpar(fontsize=11, fontface="bold") ) dev.print(tiff, file = paste0("plots/","Ordination-Fecal-Week0-only-1xRarefy-by-Litters-and-Cage-vFINAL.tiff"), width = 9.6, height = 7, units = "cm", res=600, compression="lzw") ## Display by Treatment and Cage p <- plot_ordination(rare.fecalw0.dc.16s, ord, type="samples", color="Treatment", shape="Cage.Name") p str(p) p$data$Treatment <- factor(p$data$Treatment, levels = c("Control","Low","High"), ordered = TRUE) p$data$Cage.Name <- factor(p$data$Cage.Name, levels = c( paste0("C",1:6),paste0("L",1:6),paste0("H",1:6) ), ordered = TRUE) cols <- c("Control" = "#f46d43", "Low" = "#66c2a5" , "High" = "#5e4fa2" ) # orange, aqua, dark-blue shapes <- c(1:18) pp <- p + theme_bw() + # scale_colour_manual(values = cols) + scale_shape_manual(values = shapes, name = "Enclosure") + geom_polygon(aes(fill = Treatment), alpha = 0.3) + scale_fill_manual(values = cols) + guides( shape = guide_legend(ncol = 2) ) + annotate(geom="text", x= -3, y= -0.6, label = paste0("Stress = ",round(ord$stress,4)), hjust=0, vjust=0, size=2.5 ) pp str(p$data) ## Minimise plot size pp <- p + theme_bw() + # #pp <- ggplot(data = p$data, mapping = aes(x = NMDS1, y = NMDS2) ) + geom_point(aes(color=Treatment, shape= Cage.Name), size = 1) + theme_bw() + # ggtitle("A") + scale_colour_manual(values = cols) + scale_shape_manual(values = shapes, name = "Enclosure") + geom_polygon(aes(fill = Treatment), alpha = 0.3) + scale_fill_manual(values = cols) + guides( shape = guide_legend(ncol = 2) ) + #guides( shape = guide_legend(ncol = 3) ) + #guides( col = guide_legend(ncol = 3) ) + annotate(geom="text", x= -3.5, y= -0.6, label = paste0("Stress = ",round(ord$stress,4)), hjust=0, vjust=0, size=2.5 ) + labs(x = NULL, y = NULL) + theme( plot.margin = margin(t = 2, r = 5, b = 2, l = 2, "pt"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_blank(), axis.text.y = element_blank(), axis.ticks = element_blank(), legend.box.margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), #legend.key.size = unit(2, "pt"), legend.key.size = unit(1.35, "pt"), #legend.text = element_text(margin=margin(t = 0,r = 0,b = 1,l = 0,"pt"), size = rel(0.5)), #legend.title = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.6)), #legend.text = element_text(margin=margin(t = 0,r = 0,b = 1,l = 0,"pt"), size = rel(0.4)), #legend.title = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.45)), legend.text = element_text(margin=margin(t = 0,r = 0,b = 1,l = 0,"pt"), size = rel(0.5)), legend.title = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.55)), #legend.margin = margin(t = 0,r = 0,b = 0,l = 5,"pt"), legend.margin = margin(t = 0,r = 0,b = 0,l = 2,"pt"), #legend.box.spacing = unit(2, "pt"), legend.box.spacing = unit(4, "pt"), legend.spacing = unit(4, "pt") ) pp ## Plot legend ONLY library(cowplot); packageVersion("cowplot") # '0.9.2' legend <- cowplot::get_legend(pp) grid.newpage() grid.draw(legend) dev.print(tiff, file = paste0("plots/","Legend-Ordination-Fecal-w0-w7-Cecal-vFINAL.tiff"), width = 1.35, height = 6, units = "cm", res=600, compression="lzw") detach("package:cowplot", unload=TRUE) ## No legend in this plot str(p) p <- plot_ordination(rare.fecalw0.dc.16s, ord, type="samples", color="Treatment", shape="Cage.Name") str(p) p$data$Treatment <- factor(p$data$Treatment, levels = c("Control","Low","High"), ordered = TRUE) p$data$Cage.Name <- factor(p$data$Cage.Name, levels = c( paste0("C",1:6),paste0("L",1:6),paste0("H",1:6) ), ordered = TRUE) cols <- c("Control" = "#f46d43", "Low" = "#66c2a5" , "High" = "#5e4fa2" ) # orange, aqua, dark-blue shapes <- c(1:18) ## Add centroids #https://stackoverflow.com/questions/23463324/r-add-centroids-to-scatter-plot x <- p$data$NMDS1 y <- p$data$NMDS2 Treatment <- p$data$Treatment df <- data.frame(Treatment, x, y) centroids <- aggregate(cbind(x,y)~Treatment,df,mean) f <- function(z) {sd(z)/sqrt(length(z))} # function to calculate std.err se <- aggregate(cbind(se.x=x,se.y=y)~Treatment,df,f) centroids <- merge(centroids,se, by="Treatment") # add std.err column to centroids str(centroids) ## No legend in this plot & add Centroids pp <- ggplot(data = p$data, aes(x=NMDS1, y=NMDS2, shape = Cage.Name, colour = Treatment) ) + theme_bw() + # #ggtitle("A") + geom_point(size = 1, alpha=0.5) + # was alpha = 0.55 scale_colour_manual(values = cols) + scale_shape_manual(values = shapes, name = "Enclosure") + geom_polygon(aes(fill = Treatment), alpha = 0.175, linetype="blank") + # was alpha = 0.25 #geom_point(data = df_centroid, aes(x=NMDS1, y=NMDS2, colour=Treatment), size=8, alpha=0.5, inherit.aes = FALSE, show.legend = FALSE) + # , color=Treatment geom_errorbar(data=centroids,aes(x=x, ymin=y-se.y,ymax=y+se.y, colour=Treatment), size=0.65, width=NA, inherit.aes = FALSE) + # ,width=0.1 geom_errorbarh(data=centroids,aes(y=y, xmin=x-se.x,xmax=x+se.x, colour=Treatment),size=0.65, height=NA, inherit.aes = FALSE) + # ,height=0.05 #geom_point(data=centroids, aes(x=x, y=y, colour=Treatment), shape=1, size=3.5, stroke=0.65, inherit.aes = FALSE) + # geom_point(data=centroids, aes(x=x, y=y, colour=Treatment), shape=1, size=4, stroke=0.65, inherit.aes = FALSE) + # scale_fill_manual(values = cols) + guides( shape = guide_legend(ncol = 2) ) + #annotate(geom="text", x= -3.5, y= -0.6, label = paste0("Stress = ",round(ord$stress,4)), hjust=0, vjust=0, size=2.5 ) + annotate(geom="text", x= -3.5, y= -0.595, label = paste0("Week 0 faeces\nStress = ",round(ord$stress,4)), hjust=0, vjust=0, size=2.5 ) + labs(x = NULL, y = NULL) + theme( #plot.margin = margin(t = 2, r = 5, b = 2, l = 2, "pt"), plot.margin = margin(t = 2, r = 1, b = 2, l = 1, "pt"), plot.title = element_text(margin=margin(b = 3, unit = "pt")), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_blank(), axis.text.y = element_blank(), axis.ticks = element_blank(), #legend.box.margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), #legend.key.size = unit(2, "pt"), #legend.text = element_text(margin=margin(t = 0,r = 0,b = 1,l = 0,"pt"), size = rel(0.5)), #legend.title = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.7)), #legend.margin = margin(t = 0,r = 0,b = 0,l = 5,"pt"), #legend.box.spacing = unit(2, "pt"), legend.position = "none" ) pp str(pp) grid.text(label = "A" , x = unit(0.095, "npc") , y = unit(0.93,"npc"), gp=gpar(fontsize=11, fontface="bold") ) dev.print(tiff, file = paste0("plots/","Ordination-Fecal-Week0-only-1xRarefy-by-Treatment-and-Cage-NO-LEGEND-CENTROIDS-A-vFINAL.tiff"), width = 4.83, height = 6, units = "cm", res=600, compression="lzw") # Test hypothesis that microbiota vary (with different centroids) by Litter # Calculate bray curtis distance matrix set.seed(123) bray.rare.fecalw0.dc.16s <- phyloseq::distance(rare.fecalw0.dc.16s, method = "bray") sampledf <- data.frame(sample_data(rare.fecalw0.dc.16s)) str(sampledf) # Adonis test set.seed(123) adonis(bray.rare.fecalw0.dc.16s ~ Litter + Treatment, data = sampledf) # Call: # adonis(formula = bray.rare.fecalw0.dc.16s ~ Litter + Treatment, data = sampledf) # # Permutation: free # Number of permutations: 999 # # Terms added sequentially (first to last) # # Df SumsOfSqs MeanSqs F.Model R2 Pr(>F) # Litter 14 5.7600 0.41143 3.3948 0.54302 0.001 *** # Treatment 2 0.3632 0.18161 1.4986 0.03424 0.072 . # Residuals 37 4.4841 0.12119 0.42274 # Total 53 10.6073 1.00000 # --- # Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## Litter is significant. Treatment is not at 0.05 significance level # For Litter only set.seed(123) adonis(bray.rare.fecalw0.dc.16s ~ Litter, data = sampledf) # Call: # adonis(formula = bray.rare.fecalw0.dc.16s ~ Litter, data = sampledf) # # Permutation: free # Number of permutations: 999 # # Terms added sequentially (first to last) # # Df SumsOfSqs MeanSqs F.Model R2 Pr(>F) # Litter 14 5.7600 0.41143 3.3102 0.54302 0.001 *** # Residuals 39 4.8473 0.12429 0.45698 # Total 53 10.6073 1.00000 # --- # Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 # Are faecal week 0 microbiota associated with gender? set.seed(123) adonis(bray.rare.fecalw0.dc.16s ~ Litter + Sex, data = sampledf) # Call: # adonis(formula = bray.rare.fecalw0.dc.16s ~ Litter + Sex, data = sampledf) # # Permutation: free # Number of permutations: 999 # # Terms added sequentially (first to last) # # Df SumsOfSqs MeanSqs F.Model R2 Pr(>F) # Litter 14 5.7600 0.41143 3.3300 0.54302 0.001 *** # Sex 1 0.1524 0.15239 1.2334 0.01437 0.258 # Residuals 38 4.6949 0.12355 0.44261 # Total 53 10.6073 1.00000 # --- # Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 # Sex is not significant - so this is not a valid model # Adonis test set.seed(123) adonis(bray.rare.fecalw0.dc.16s ~ Treatment, data = sampledf) # Homogeneity of dispersion test beta <- betadisper(bray.rare.fecalw0.dc.16s, sampledf$Litter) set.seed(123) permutest(beta) # Permutation test for homogeneity of multivariate dispersions # Permutation: free # Number of permutations: 999 # # Response: Distances # Df Sum Sq Mean Sq F N.Perm Pr(>F) # Groups 14 0.22808 0.016291 1.0464 999 0.407 # Residuals 39 0.60721 0.015569 table(rare.fecalw0.dc.16s@sam_data$Treatment) # Control High Low # 18 18 18 table(rare.fecalw0.dc.16s@sam_data$Sex) # female male # 27 27 table(rare.fecalw0.dc.16s@sam_data$Litter) # Blue 20 Blue 24 Blue 27 Green 13 Green 14 Green 18 Green 21 Green 30 Orange 15 Orange 19 Pink 23 Pink 24 Purple 26 # 5 2 2 4 2 4 1 2 7 5 4 6 4 # Red 22 Red 23 # 2 4 summary(as.numeric(table(rare.fecalw0.dc.16s@sam_data$Litter))) # Min. 1st Qu. Median Mean 3rd Qu. Max. # 1.0 2.0 4.0 3.6 4.5 7.0 # i.e. median 4, IQR 2-4.5 #------------------------ #### Ordination of fecal Week 7 only samples #------------------------ unique(clean.16s@sam_data$samp_type) # "cecal" "fecal" "air" "soil" "fresh bedding" fecalw7.dc.16s <- prune_samples( clean.16s@sam_data$samp_type == "fecal" & clean.16s@sam_data$Time == "Week 7", clean.16s ) fecalw7.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 2730 taxa and 53 samples ] # sample_data() Sample Data: [ 53 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 2730 taxa by 7 taxonomic ranks ] min(sample_sums(fecalw7.dc.16s)) # 14195 min(taxa_sums(fecalw7.dc.16s)) # 0 # prune taxa that have zero sequence reads fecalw7.dc.16s <- prune_taxa( taxa = taxa_sums(fecalw7.dc.16s) > 0, x = fecalw7.dc.16s ) fecalw7.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 645 taxa and 53 samples ] # sample_data() Sample Data: [ 53 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 645 taxa by 7 taxonomic ranks ] table(fecalw7.dc.16s@sam_data$Cage.Name) # C1 C2 C3 C4 C5 C6 H1 H2 H3 H4 H5 H6 L1 L2 L3 L4 L5 L6 # 3 3 3 3 3 3 3 3 3 3 3 3 2 3 3 3 3 3 table( fecalw7.dc.16s@sam_data$Treatment ) # Control High Low # 18 18 17 table( fecalw7.dc.16s@sam_data$Time ) # Week 7 # 53 ## Use lowest number of reads to create rarefied dataset seed <- 123 rare.fecalw7.dc.16s <- rarefy_even_depth(fecalw7.dc.16s, sample.size = min(sample_sums(fecalw7.dc.16s)), rngseed = seed, replace = FALSE, trimOTUs = TRUE, verbose = TRUE) sample_sums(rare.fecalw7.dc.16s) # all 14195 ntaxa(rare.fecalw7.dc.16s) # 576 nsamples(rare.fecalw7.dc.16s) # 53 ### ORDINATION PLOT [finished-plot] # # # # # # # # # # # # # # # ### NMDS + Bray-Curtis set.seed(123) ord <- ordinate(rare.fecalw7.dc.16s, "NMDS", "bray") ord # Call: # metaMDS(comm = veganifyOTU(physeq), distance = distance) # # global Multidimensional Scaling using monoMDS # # Data: wisconsin(sqrt(veganifyOTU(physeq))) # Distance: bray # # Dimensions: 2 # Stress: 0.1577653 # Stress type 1, weak ties # Two convergent solutions found after 20 tries # Scaling: centring, PC rotation, halfchange scaling # Species: expanded scores based on 'wisconsin(sqrt(veganifyOTU(physeq)))' str(ord) rare.fecalw7.dc.16s@sam_data$samp_type # all "fecal" rare.fecalw7.dc.16s@sam_data$Time # all "Week 7" length(unique(rare.fecalw7.dc.16s@sam_data$Litter )) # 15 length(unique(rare.fecalw7.dc.16s@sam_data$Cage.Name )) # 18 p <- plot_ordination(rare.fecalw7.dc.16s, ord, type="samples", color="Treatment", shape="Cage.Name") p str(p) p$data$Treatment <- factor(p$data$Treatment, levels = c("Control","Low","High"), ordered = TRUE) p$data$Cage.Name <- factor(p$data$Cage.Name, levels = c( paste0("C",1:6),paste0("L",1:6),paste0("H",1:6) ), ordered = TRUE) p$data$Sex cols <- c("Control" = "#f46d43", "Low" = "#66c2a5" , "High" = "#5e4fa2" ) # orange, aqua, dark-blue shapes <- c(1:18) pp <- p + theme_bw() + scale_colour_manual(values = cols) + scale_shape_manual(values = shapes, name = "Enclosure") + scale_fill_manual(values = cols) + geom_polygon(aes(fill = Treatment), alpha = 0.3) + annotate(geom="text", x= 0.43, y= -0.3, label = paste0("Stress = ",round(ord$stress,5)), hjust=1, vjust=0, size=3.25 ) + guides( color = guide_legend(order = 0), shape = guide_legend(order = 1) ) + guides( shape = guide_legend(ncol = 2) ) # + pp ## Minimise plot size & ## No legend in this plot p <- plot_ordination(rare.fecalw7.dc.16s, ord, type="samples", color="Treatment", shape="Cage.Name") p$data$Treatment <- factor(p$data$Treatment, levels = c("Control","Low","High"), ordered = TRUE) p$data$Cage.Name <- factor(p$data$Cage.Name, levels = c( paste0("C",1:6),paste0("L",1:6),paste0("H",1:6) ), ordered = TRUE) cols <- c("Control" = "#f46d43", "Low" = "#66c2a5" , "High" = "#5e4fa2" ) # orange, aqua, dark-blue shapes <- c(1:18) #https://stackoverflow.com/questions/23463324/r-add-centroids-to-scatter-plot x <- p$data$NMDS1 y <- p$data$NMDS2 Treatment <- p$data$Treatment df <- data.frame(Treatment, x, y) centroids <- aggregate(cbind(x,y)~Treatment,df,mean) f <- function(z) {sd(z)/sqrt(length(z))} # function to calculate std.err se <- aggregate(cbind(se.x=x,se.y=y)~Treatment,df,f) centroids <- merge(centroids,se, by="Treatment") # add std.err column to centroids str(centroids) ## No legend in this plot & add Centroids pp <- ggplot(data=p$data, aes(x=NMDS1, y=NMDS2, shape = Cage.Name, colour = Treatment)) + theme_bw() + #ggtitle("B") + geom_point(size = 1, alpha=0.5) + scale_colour_manual(values = cols) + scale_shape_manual(values = shapes, name = "Enclosure") + geom_polygon(aes(fill = Treatment), alpha = 0.175, linetype="blank") + geom_errorbar(data=centroids,aes(x=x, ymin=y-se.y,ymax=y+se.y, colour=Treatment), size=0.65, width=NA, inherit.aes = FALSE) + # ,width=0.1 geom_errorbarh(data=centroids,aes(y=y, xmin=x-se.x,xmax=x+se.x, colour=Treatment),size=0.65, height=NA, inherit.aes = FALSE) + # ,height=0.05 geom_point(data=centroids, aes(x=x, y=y, colour=Treatment), shape=1, size=4, stroke=0.65, inherit.aes = FALSE) + # scale_fill_manual(values = cols) + guides( shape = guide_legend(ncol = 2) ) + #annotate(geom="text", x= -0.4, y= -0.42, label = paste0("Stress = ",round(ord$stress,4)), hjust=0, vjust=0, size=2.5 ) + annotate(geom="text", x= -0.42, y= -0.44, label = paste0("Week 7 faeces\nStress = ",round(ord$stress,4)), hjust=0, vjust=0, size=2.5 ) + labs(x = NULL, y = NULL) + theme( #plot.margin = margin(t = 2, r = 5, b = 2, l = 2, "pt"), plot.margin = margin(t = 2, r = 1, b = 2, l = 1, "pt"), plot.title = element_text(margin=margin(b = 3, unit = "pt")), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_blank(), axis.text.y = element_blank(), axis.ticks = element_blank(), # legend.box.margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), # legend.key.size = unit(2, "pt"), # # legend.text = element_text(margin=margin(t = 0,r = 0,b = 1,l = 0,"pt"), size = rel(0.5)), # legend.title = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.7)), # legend.margin = margin(t = 0,r = 0,b = 0,l = 5,"pt"), # legend.box.spacing = unit(2, "pt"), legend.position = "none" ) pp grid.text(label = "B" , x = unit(0.095, "npc") , y = unit(0.93,"npc"), gp=gpar(fontsize=11, fontface="bold") ) dev.print(tiff, file = paste0("plots/","Ordination-Fecal-Week7-only-1xRarefy-by-Treatment-and-Cage-NO-LEGEND-CENTROIDS-B-vFINAL.tiff"), width = 4.83, height = 6, units = "cm", res=600, compression="lzw") # Test hypothesis that microbiota vary (with different centroids) by Treatment # Calculate bray curtis distance matrix set.seed(123) bray.rare.fecalw7.dc.16s <- phyloseq::distance(rare.fecalw7.dc.16s, method = "bray") sampledf <- data.frame(sample_data(rare.fecalw7.dc.16s)) str(sampledf) # Adonis test set.seed(123) adonis(bray.rare.fecalw7.dc.16s ~ Treatment + Sex + Litter + Cage.Name, data = sampledf) # Call: # adonis(formula = bray.rare.fecalw7.dc.16s ~ Treatment + Sex + Litter + Cage.Name, data = sampledf) # # Permutation: free # Number of permutations: 999 # # Terms added sequentially (first to last) # # Df SumsOfSqs MeanSqs F.Model R2 Pr(>F) # Treatment 2 0.3005 0.15027 2.4955 0.05452 0.020 * # Sex 1 0.3510 0.35098 5.8289 0.06367 0.002 ** # Litter 14 1.6480 0.11772 1.9549 0.29895 0.001 *** # Cage.Name 14 1.9486 0.13918 2.3114 0.35347 0.001 *** # Residuals 21 1.2645 0.06021 0.22938 # Total 52 5.5126 1.00000 # --- # Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 # Homogeneity of dispersion test beta <- betadisper(bray.rare.fecalw7.dc.16s, sampledf$Treatment) set.seed(123) permutest(beta) # Permutation test for homogeneity of multivariate dispersions # Permutation: free # Number of permutations: 999 # # Response: Distances # Df Sum Sq Mean Sq F N.Perm Pr(>F) # Groups 2 0.004422 0.0022111 0.4247 999 0.66 # Residuals 50 0.260341 0.0052068 table(rare.fecalw7.dc.16s@sam_data$Treatment) # Control High Low # 18 18 17 table(rare.fecalw7.dc.16s@sam_data$Sex) # female male # 26 27 x<- table(rare.fecalw7.dc.16s@sam_data$Litter) x # Blue 20 Blue 24 Blue 27 Green 13 Green 14 Green 18 Green 21 Green 30 Orange 15 Orange 19 Pink 23 # 5 2 1 4 2 4 1 2 7 5 4 # Pink 24 Purple 26 Red 22 Red 23 # 6 4 2 4 hist( as.numeric(x) ) summary( as.numeric(x) ) # Min. 1st Qu. Median Mean 3rd Qu. Max. # 1.000 2.000 4.000 3.533 4.500 7.000 table(rare.fecalw7.dc.16s@sam_data$Cage.Name) # C1 C2 C3 C4 C5 C6 H1 H2 H3 H4 H5 H6 L1 L2 L3 L4 L5 L6 # 3 3 3 3 3 3 3 3 3 3 3 3 2 3 3 3 3 3 # #------------------------ #### Ordination of cecal only samples #------------------------ unique(clean.16s@sam_data$samp_type) # "cecal" "fecal" "air" "soil" "fresh bedding" cecal.dc.16s <- prune_samples( clean.16s@sam_data$samp_type == "cecal", clean.16s ) cecal.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 2730 taxa and 53 samples ] # sample_data() Sample Data: [ 53 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 2730 taxa by 7 taxonomic ranks ] min(sample_sums(cecal.dc.16s)) # 14632 min(taxa_sums(cecal.dc.16s)) # 0 # prune taxa that have zero sequence reads cecal.dc.16s <- prune_taxa( taxa = taxa_sums(cecal.dc.16s) > 0, x = cecal.dc.16s ) cecal.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 550 taxa and 53 samples ] # sample_data() Sample Data: [ 53 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 550 taxa by 7 taxonomic ranks ] table(cecal.dc.16s@sam_data$Cage.Name) # C1 C2 C3 C4 C5 C6 H1 H2 H3 H4 H5 H6 L1 L2 L3 L4 L5 L6 # 3 3 3 3 3 3 3 3 3 3 3 3 3 2 3 3 3 3 table( cecal.dc.16s@sam_data$Treatment ) # Control High Low # 18 18 17 table( cecal.dc.16s@sam_data$Time ) # Post-exposure # 53 ## Use lowest number of reads to create rarefied dataset seed <- 123 rare.cecal.dc.16s <- rarefy_even_depth(cecal.dc.16s, sample.size = min(sample_sums(cecal.dc.16s)), rngseed = seed, replace = FALSE, trimOTUs = TRUE, verbose = TRUE) sample_sums(rare.cecal.dc.16s) # all 14632 ntaxa(rare.cecal.dc.16s) # 496 nsamples(rare.cecal.dc.16s) # 53 ### ORDINATION PLOT [finished-plot] # # # # # # # # # # # # # # # ### NMDS + Bray-Curtis set.seed(123) ord <- ordinate(rare.cecal.dc.16s, "NMDS", "bray") ord # Call: # metaMDS(comm = veganifyOTU(physeq), distance = distance) # # global Multidimensional Scaling using monoMDS # # Data: wisconsin(sqrt(veganifyOTU(physeq))) # Distance: bray # # Dimensions: 2 # Stress: 0.2012916 # Stress type 1, weak ties # No convergent solutions - best solution after 20 tries # Scaling: centring, PC rotation, halfchange scaling # Species: expanded scores based on 'wisconsin(sqrt(veganifyOTU(physeq)))' str(ord) rare.cecal.dc.16s@sam_data$samp_type # all "cecal" rare.cecal.dc.16s@sam_data$Time # all "Post-exposure" length(unique(rare.cecal.dc.16s@sam_data$Litter )) # 15 length(unique(rare.cecal.dc.16s@sam_data$Cage.Name )) # 18 p <- plot_ordination(rare.cecal.dc.16s, ord, type="samples", color="Treatment", shape="Cage.Name") p str(p) p$data$Treatment <- factor(p$data$Treatment, levels = c("Control","Low","High"), ordered = TRUE) p$data$Cage.Name <- factor(p$data$Cage.Name, levels = c( paste0("C",1:6),paste0("L",1:6),paste0("H",1:6) ), ordered = TRUE) cols <- c("Control" = "#f46d43", "Low" = "#66c2a5" , "High" = "#5e4fa2" ) # orange, aqua, dark-blue shapes <- c(1:18) pp <- p + theme_bw() + scale_colour_manual(values = cols) + scale_shape_manual(values = shapes, name = "Enclosure") + scale_fill_manual(values = cols) + geom_polygon(aes(fill = Treatment), alpha = 0.3) + annotate(geom="text", x= -0.45, y= -0.3, label = paste0("Stress = ",round(ord$stress,5)), hjust=0, vjust=1, size=3.25 ) + guides( color = guide_legend(order = 0), shape = guide_legend(order = 1) ) + guides( shape = guide_legend(ncol = 2) ) #+ pp ## Minimise plot size & ## Add centroids to this plot p <- plot_ordination(rare.cecal.dc.16s, ord, type="samples", color="Treatment", shape="Cage.Name") p$data$Treatment <- factor(p$data$Treatment, levels = c("Control","Low","High"), ordered = TRUE) p$data$Cage.Name <- factor(p$data$Cage.Name, levels = c( paste0("C",1:6),paste0("L",1:6),paste0("H",1:6) ), ordered = TRUE) cols <- c("Control" = "#f46d43", "Low" = "#66c2a5" , "High" = "#5e4fa2" ) # orange, aqua, dark-blue shapes <- c(1:18) #https://stackoverflow.com/questions/23463324/r-add-centroids-to-scatter-plot x <- p$data$NMDS1 y <- p$data$NMDS2 Treatment <- p$data$Treatment df <- data.frame(Treatment, x, y) centroids <- aggregate(cbind(x,y)~Treatment,df,mean) f <- function(z) {sd(z)/sqrt(length(z))} # function to calculate std.err se <- aggregate(cbind(se.x=x,se.y=y)~Treatment,df,f) centroids <- merge(centroids,se, by="Treatment") # add std.err column to centroids str(centroids) ## No legend in this plot & add Centroids pp <- ggplot(data=p$data, aes(x=NMDS1, y=NMDS2, shape = Cage.Name, colour = Treatment)) + theme_bw() + #ggtitle("C") + geom_point(size = 1, alpha=0.5) + scale_colour_manual(values = cols) + scale_shape_manual(values = shapes, name = "Enclosure") + geom_polygon(aes(fill = Treatment), alpha = 0.175, linetype="blank") + geom_errorbar(data=centroids,aes(x=x, ymin=y-se.y,ymax=y+se.y, colour=Treatment), size=0.65, width=NA, inherit.aes = FALSE) + # ,width=0.1 geom_errorbarh(data=centroids,aes(y=y, xmin=x-se.x,xmax=x+se.x, colour=Treatment),size=0.65, height=NA, inherit.aes = FALSE) + # ,height=0.05 geom_point(data=centroids, aes(x=x, y=y, colour=Treatment), shape=1, size=4, stroke=0.65, inherit.aes = FALSE) + # scale_fill_manual(values = cols) + guides( shape = guide_legend(ncol = 2) ) + #annotate(geom="text", x= -0.47, y= -0.43, label = paste0("Stress = ",round(ord$stress,4)), hjust=0, vjust=0, size=2.5 ) + annotate(geom="text", x= -0.485, y= -0.425, label = paste0("Caeca\nStress = ",round(ord$stress,4)), hjust=0, vjust=0, size=2.5 ) + labs(x = NULL, y = NULL) + theme( #plot.margin = margin(t = 2, r = 5, b = 2, l = 2, "pt"), plot.margin = margin(t = 2, r = 1, b = 2, l = 1, "pt"), plot.title = element_text(margin=margin(b = 3, unit = "pt")), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_blank(), axis.text.y = element_blank(), axis.ticks = element_blank(), # legend.box.margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), # legend.key.size = unit(2, "pt"), # # legend.text = element_text(margin=margin(t = 0,r = 0,b = 1,l = 0,"pt"), size = rel(0.5)), # legend.title = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.7)), # legend.margin = margin(t = 0,r = 0,b = 0,l = 5,"pt"), # legend.box.spacing = unit(2, "pt"), legend.position = "none" ) pp grid.text(label = "C" , x = unit(0.095, "npc") , y = unit(0.93,"npc"), gp=gpar(fontsize=11, fontface="bold") ) dev.print(tiff, file = paste0("plots/","Ordination-Cecal-only-1xRarefy-NO-LEGEND-CENTROIDS-C-vFINAL.tiff"), width = 4.83, height = 6, units = "cm", res=600, compression="lzw") # Test hypothesis that microbiota vary (with different centroids) by Treatment # Calculate bray curtis distance matrix set.seed(123) bray.rare.cecal.dc.16s <- phyloseq::distance(rare.cecal.dc.16s, method = "bray") sampledf <- data.frame(sample_data(rare.cecal.dc.16s)) str(sampledf) # Adonis test set.seed(123) adonis(bray.rare.cecal.dc.16s ~ Treatment + Sex + Litter + Cage.Name, data = sampledf) # Call: # adonis(formula = bray.rare.cecal.dc.16s ~ Treatment + Sex + Litter + Cage.Name, data = sampledf) # # Permutation: free # Number of permutations: 999 # # Terms added sequentially (first to last) # # Df SumsOfSqs MeanSqs F.Model R2 Pr(>F) # Treatment 2 0.1912 0.09559 2.4672 0.04201 0.007 ** # Sex 1 0.4075 0.40752 10.5179 0.08955 0.001 *** # Litter 14 1.7985 0.12846 3.3156 0.39520 0.001 *** # Cage.Name 14 1.3399 0.09571 2.4703 0.29444 0.001 *** # Residuals 21 0.8136 0.03875 0.17879 # Total 52 4.5508 1.00000 # --- # Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 # Homogeneity of dispersion test beta <- betadisper(bray.rare.cecal.dc.16s, sampledf$Treatment) set.seed(123) permutest(beta) # Permutation test for homogeneity of multivariate dispersions # Permutation: free # Number of permutations: 999 # # Response: Distances # Df Sum Sq Mean Sq F N.Perm Pr(>F) # Groups 2 0.019532 0.0097661 1.9675 999 0.142 # Residuals 50 0.248184 0.0049637 table( rare.cecal.dc.16s@sam_data$Treatment) # Control High Low # 18 18 17 table( rare.cecal.dc.16s@sam_data$Sex) # female male # 27 26 table( rare.cecal.dc.16s@sam_data$Litter) # Blue 20 Blue 24 Blue 27 Green 13 Green 14 Green 18 Green 21 Green 30 Orange 15 Orange 19 Pink 23 # 5 2 2 4 2 4 1 2 6 5 4 # Pink 24 Purple 26 Red 22 Red 23 # 6 4 2 4 summary( as.numeric( table( rare.cecal.dc.16s@sam_data$Litter) )) # Min. 1st Qu. Median Mean 3rd Qu. Max. # 1.000 2.000 4.000 3.533 4.500 6.000 table( rare.cecal.dc.16s@sam_data$Cage.Name) # C1 C2 C3 C4 C5 C6 H1 H2 H3 H4 H5 H6 L1 L2 L3 L4 L5 L6 # 3 3 3 3 3 3 3 3 3 3 3 3 3 2 3 3 3 3 # #------------------------ #### Identify trending taxa within treatment groups # Differential abundance Wk 0 > Wk 7 separately within Treatments #------------------------ # 1. Separately within each treatment: # a. determine differentially abundant taxa (using DESEq2 with alpha = 0.05) between Week 0 to Week 7. # b. from Week 7 samples, filter taxa to retain only significantly differentially abundant taxa that are increasing between Week 0 to Week 7. # 2. Merge the filtered Week 7 samples # 3. Normalise # 4. Heatmap - to visualise significant differentially abundant increasing taxa in each treatment ## First isolate fecal samples from Week 0 and 7 fecal.w0w7.dc.16s <- prune_samples( clean.16s@sam_data$samp_type == "fecal", clean.16s ) fecal.w0w7.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 2730 taxa and 107 samples ] # sample_data() Sample Data: [ 107 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 2730 taxa by 7 taxonomic ranks ] min(sample_sums(fecal.w0w7.dc.16s)) # 11450 min(taxa_sums(fecal.w0w7.dc.16s)) # 0 # prune taxa that have zero sequence reads fecal.w0w7.dc.16s <- prune_taxa(taxa = taxa_sums(fecal.w0w7.dc.16s) > 0, x = fecal.w0w7.dc.16s) fecal.w0w7.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 800 taxa and 107 samples ] # sample_data() Sample Data: [ 107 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 800 taxa by 7 taxonomic ranks ] ## focus on samples that can be traced from Week 0 to Week 7 table(fecal.w0w7.dc.16s@sam_data$Time) # Week 0 Week 7 # 54 53 table(fecal.w0w7.dc.16s@sam_data$mouseID) # C1m1 C1m2 C1m3 C2m1 C2m2 C2m3 C3m1 C3m2 C3m3 C4m1 C4m2 C4m3 C5m1 C5m2 C5m3 C6m1 C6m2 C6m3 H1m1 H1m2 # 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 # H1m3 H2m1 H2m2 H2m3 H3m1 H3m2 H3m3 H4m1 H4m2 H4m3 H5m1 H5m2 H5m3 H6m1 H6m2 H6m3 L1m1 L1m2 L1m3 L2m1 # 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 # L2m2 L2m3 L3m1 L3m2 L3m3 L4m1 L4m2 L4m3 L5m1 L5m2 L5m3 L6m1 L6m2 L6m3 # 2 2 2 2 2 2 2 2 2 2 2 2 2 2 ## note mouseID: L1m2 only has one entry mice <- sort( unique(fecal.w0w7.dc.16s@sam_data$mouseID) ) mice # [1] "C1m1" "C1m2" "C1m3" "C2m1" "C2m2" "C2m3" "C3m1" "C3m2" "C3m3" "C4m1" "C4m2" "C4m3" "C5m1" "C5m2" # [15] "C5m3" "C6m1" "C6m2" "C6m3" "H1m1" "H1m2" "H1m3" "H2m1" "H2m2" "H2m3" "H3m1" "H3m2" "H3m3" "H4m1" # [29] "H4m2" "H4m3" "H5m1" "H5m2" "H5m3" "H6m1" "H6m2" "H6m3" "L1m1" "L1m2" "L1m3" "L2m1" "L2m2" "L2m3" # [43] "L3m1" "L3m2" "L3m3" "L4m1" "L4m2" "L4m3" "L5m1" "L5m2" "L5m3" "L6m1" "L6m2" "L6m3" sel <- which(mice == "L1m2") # 38 mice2 <- mice[-sel] phy_obj <- fecal.w0w7.dc.16s phy_obj <- prune_samples( phy_obj@sam_data$mouseID %in% mice2, phy_obj ) phy_obj # phyloseq-class experiment-level object # otu_table() OTU Table: [ 800 taxa and 106 samples ] # sample_data() Sample Data: [ 106 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 800 taxa by 7 taxonomic ranks ] min(sample_sums(phy_obj)) # 11450 min(taxa_sums(phy_obj)) # 1 ## 1. Separately within each treatment: ## a. determine differentially abundant taxa (using DESEq2 with alpha = 0.05) between Week 0 to Week 7. ### Control samples # prune to control samples only phy_obj.cont <- prune_samples( phy_obj@sam_data$Treatment == "Control", phy_obj ) phy_obj.cont # phyloseq-class experiment-level object # otu_table() OTU Table: [ 800 taxa and 36 samples ] # sample_data() Sample Data: [ 36 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 800 taxa by 7 taxonomic ranks ] min(sample_sums(phy_obj.cont)) # 20718 min(taxa_sums(phy_obj.cont)) # 0 # prune taxa that have zero sequence reads phy_obj.cont <- prune_taxa(taxa = taxa_sums(phy_obj.cont) > 0, x = phy_obj.cont) phy_obj.cont # phyloseq-class experiment-level object # otu_table() OTU Table: [ 704 taxa and 36 samples ] # sample_data() Sample Data: [ 36 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 704 taxa by 7 taxonomic ranks ] min(taxa_sums(phy_obj.cont)) # 1 table( phy_obj.cont@sam_data$Time ) # Week 0 Week 7 # 18 18 # reformat to avoid errors sel<- which(phy_obj.cont@sam_data$Time == "Week 0") phy_obj.cont@sam_data$Time[sel] <- "Week_0" sel<- which(phy_obj.cont@sam_data$Time == "Week 7") phy_obj.cont@sam_data$Time[sel] <- "Week_7" # convert to DESeq dataset phy_obj.cont.deseq <- phyloseq_to_deseq2(phy_obj.cont, ~ Time) # Differential expression analysis based on the Negative Binomial (a.k.a. Gamma-Poisson) distribution #http://rstudio-pubs-static.s3.amazonaws.com/218660_7cbe1343ac434dbab59924a80ac2e815.html # Experiments without replicates do not allow for estimation of the dispersion of counts # around the expected value for each group, which is critical for differential expression # analysis. If an experimental design is supplied which does not contain the necessary # degrees of freedom for differential analysis, DESeq will provide a message to the user # and follow the strategy outlined in Anders and Huber (2010) under the section 'Working # without replicates', wherein all the samples are considered as replicates of a single # group for the estimation of dispersion. As noted in the reference above: "Some # overestimation of the variance may be expected, which will make that approach conservative." # Furthermore, "while one may not want to draw strong conclusions from such an analysis, # it may still be useful for exploration and hypothesis generation." deseq.cont <- DESeq(phy_obj.cont.deseq, test="Wald", fitType="parametric") # estimating size factors # estimating dispersions # gene-wise dispersion estimates # mean-dispersion relationship # final dispersion estimates # fitting model and testing # -- replacing outliers and refitting for 75 genes # -- DESeq argument 'minReplicatesForReplace' = 7 # -- original counts are preserved in counts(dds) # estimating dispersions # fitting model and testing alpha <- 0.05 res <- results(deseq.cont) sigtab.cont <- res[which(res$padj < alpha), ] sigtab.cont <- cbind(as(sigtab.cont, "data.frame"), as(tax_table(phy_obj.cont)[rownames(sigtab.cont), ], "matrix")) dim(sigtab.cont) # 75 13 names(sigtab.cont) # [1] "baseMean" "log2FoldChange" "lfcSE" "stat" "pvalue" # [6] "padj" "Kingdom" "Phylum" "Class" "Order" # [11] "Family" "Genus" "Species" sigtab.cont[ order(sigtab.cont$log2FoldChange) ,c( "baseMean", "log2FoldChange", "lfcSE", "stat", "pvalue", "padj", "Phylum", "Genus", "Species" )] # baseMean log2FoldChange lfcSE stat pvalue padj Phylum Genus Species # OTU_112 18.5428583 -4.2506642 1.2351867 -3.441313 5.788981e-04 8.236892e-03 p__Proteobacteria g__Desulfovibrio s__unknown # OTU_45 40.3846440 -4.1297206 1.1793518 -3.501687 4.623226e-04 7.299361e-03 p__Firmicutes g__Lactobacillus s__reuteri # OTU_3988 1.3771808 -3.6669988 1.2170888 -3.012926 2.587418e-03 2.431197e-02 p__Bacteroidetes g__Bacteroides s__unknown # OTU_2913 23.5846873 -3.3053404 0.9544437 -3.463107 5.339762e-04 7.821180e-03 p__Bacteroidetes g__Bacteroides s__unknown # OTU_3777 5.5509050 -3.0515944 0.8422895 -3.622976 2.912332e-04 4.834472e-03 p__Bacteroidetes g__unknown s__unknown # OTU_3406 12.1631678 -2.1689781 0.6482876 -3.345704 8.207402e-04 1.048022e-02 p__Bacteroidetes g__unknown s__unknown # OTU_5415 2.4980769 -2.1436782 0.6142998 -3.489628 4.836926e-04 7.299361e-03 p__Bacteroidetes g__unknown s__unknown # OTU_2 2212.9213516 -2.0302391 0.4804141 -4.226019 2.378620e-05 7.897017e-04 p__Firmicutes g__Lactobacillus s__unknown # OTU_7 1161.8179196 -1.8145545 0.6380401 -2.843950 4.455799e-03 3.522203e-02 p__Bacteroidetes g__Bacteroides s__unknown # OTU_142 29.7555608 -1.7881287 0.6499943 -2.750991 5.941524e-03 4.226970e-02 p__Bacteroidetes g__unknown s__unknown # OTU_81 79.1297351 -1.6301467 0.5070926 -3.214692 1.305844e-03 1.445134e-02 p__Firmicutes g__unknown s__unknown # OTU_7626 3.5714560 -1.4922851 0.5380635 -2.773437 5.546763e-03 4.062188e-02 p__Bacteroidetes g__unknown s__unknown # OTU_127 105.0690016 -1.4610754 0.5056610 -2.889437 3.859325e-03 3.150728e-02 p__Bacteroidetes g__Bacteroides s__acidifaciens # OTU_2846 7.3957390 -1.3650226 0.4856221 -2.810874 4.940715e-03 3.785348e-02 p__Bacteroidetes g__unknown s__unknown # OTU_2084 37.3172508 -1.2778829 0.3445784 -3.708540 2.084577e-04 3.720854e-03 p__Bacteroidetes g__unknown s__unknown # OTU_16 702.2457975 -1.2324181 0.4121339 -2.990335 2.786720e-03 2.569975e-02 p__Bacteroidetes g__unknown s__unknown # OTU_2041 110.8760392 -1.0220104 0.3225915 -3.168125 1.534255e-03 1.625657e-02 p__Bacteroidetes g__unknown s__unknown # OTU_2544 89.9194171 -0.9752328 0.3182380 -3.064476 2.180516e-03 2.129209e-02 p__Bacteroidetes g__unknown s__unknown # OTU_3242 69.9050764 -0.9723833 0.3448169 -2.819999 4.802385e-03 3.736856e-02 p__Bacteroidetes g__unknown s__unknown # OTU_22 391.2760216 -0.7896047 0.2751368 -2.869862 4.106508e-03 3.298453e-02 p__Bacteroidetes g__Bacteroides s__acidifaciens # OTU_15 507.2138786 1.3323698 0.4779238 2.787829 5.306259e-03 3.944055e-02 p__Firmicutes g__[Ruminococcus] s__gnavus # OTU_19 375.9962846 1.4640192 0.4986532 2.935947 3.325314e-03 2.760011e-02 p__Bacteroidetes g__Bacteroides s__unknown # OTU_101 18.0828765 1.5300412 0.5194369 2.945577 3.223532e-03 2.760011e-02 p__Firmicutes g__Oscillospira s__unknown # OTU_133 9.5817369 1.6039718 0.5185185 3.093374 1.978945e-03 1.971029e-02 p__Firmicutes g__unknown s__unknown # OTU_59 53.7704254 1.6885705 0.5727061 2.948407 3.194167e-03 2.760011e-02 p__Firmicutes g__Butyricicoccus s__pullicaecorum # OTU_254 7.7712083 1.7280668 0.5481499 3.152544 1.618543e-03 1.679238e-02 p__Firmicutes g__Ruminococcus s__unknown # OTU_46 57.3750818 1.7748147 0.5483134 3.236862 1.208519e-03 1.399634e-02 p__Firmicutes g__[Ruminococcus] s__gnavus # OTU_402 3.4796363 1.9149560 0.6935720 2.761005 5.762371e-03 4.158929e-02 p__Firmicutes g__Coprococcus s__unknown # OTU_67 75.5550603 2.0057828 0.4965681 4.039290 5.361318e-05 1.570551e-03 p__Firmicutes g__Oscillospira s__unknown # OTU_531 2.9537727 2.1527823 0.7845156 2.744091 6.067866e-03 4.256053e-02 p__Firmicutes g__unknown s__unknown # OTU_201 4.4475730 2.1689741 0.7278070 2.980150 2.881072e-03 2.608680e-02 p__Firmicutes g__unknown s__unknown # OTU_2760 26.4827857 2.3166227 0.6798295 3.407652 6.552433e-04 8.819221e-03 p__Firmicutes g__unknown s__unknown # OTU_57 23.9227294 2.4104822 0.7350809 3.279207 1.040993e-03 1.264426e-02 p__Firmicutes g__unknown s__unknown # OTU_2998 105.2135832 2.4653336 0.8322969 2.962084 3.055641e-03 2.717338e-02 p__Firmicutes g__unknown s__unknown # OTU_7119 0.7424765 2.5212914 0.9366481 2.691824 7.106252e-03 4.847827e-02 p__Bacteroidetes g__unknown s__unknown # OTU_616 104.8140239 2.5436940 0.9492582 2.679665 7.369584e-03 4.959286e-02 p__Firmicutes g__unknown s__unknown # OTU_9505 6.1165439 2.6810103 0.8391426 3.194940 1.398600e-03 1.514136e-02 p__Firmicutes g__unknown s__unknown # OTU_3795 2.8553525 2.6905668 0.7973955 3.374194 7.403224e-04 9.702120e-03 p__Firmicutes g__Oscillospira s__unknown # OTU_202 1.9910979 2.7181841 1.0160733 2.675185 7.468804e-03 4.959286e-02 p__Actinobacteria g__Adlercreutzia s__unknown # OTU_419 2.4862198 2.7388725 1.0089881 2.714474 6.638101e-03 4.591353e-02 p__Firmicutes g__unknown s__unknown # OTU_481 261.4869980 2.7709459 0.9068735 3.055493 2.246906e-03 2.151845e-02 p__Firmicutes g__unknown s__unknown # OTU_130 8.3286306 2.7914313 0.9506076 2.936470 3.319704e-03 2.760011e-02 p__Firmicutes g__unknown s__unknown # OTU_2934 21.1622562 2.8177479 0.7572937 3.720813 1.985826e-04 3.720854e-03 p__Firmicutes g__unknown s__unknown # OTU_4034 5.5412380 3.0181838 0.7191245 4.197025 2.704437e-05 8.417559e-04 p__Firmicutes g__unknown s__unknown # OTU_125 10.4219583 3.1050028 0.7761124 4.000713 6.315199e-05 1.736443e-03 p__Firmicutes g__Ruminococcus s__unknown # OTU_6190 3.0942726 3.1125500 0.9670290 3.218673 1.287854e-03 1.445134e-02 p__Firmicutes g__Clostridium s__methylpentosum # OTU_2039 17.0873664 3.1661525 0.6770033 4.676717 2.915045e-06 1.451693e-04 p__Firmicutes g__unknown s__unknown # OTU_4702 1.7382488 3.2092892 0.8192252 3.917469 8.948355e-05 2.025582e-03 p__Firmicutes g__unknown s__unknown # OTU_2419 467.2176794 3.2674696 0.8215539 3.977182 6.973668e-05 1.736443e-03 p__Firmicutes g__unknown s__unknown # OTU_4387 4.1987166 3.3557159 0.9088920 3.692096 2.224139e-04 3.819384e-03 p__Firmicutes g__unknown s__unknown # OTU_10092 2.5731796 3.4105243 0.8955341 3.808369 1.398865e-04 3.028847e-03 p__Firmicutes g__unknown s__unknown # OTU_258 1.5877156 3.4238107 1.2272272 2.789875 5.272837e-03 3.944055e-02 p__Firmicutes g__Clostridium s__unknown # OTU_64 53.9786644 3.5316462 0.9525336 3.707634 2.092046e-04 3.720854e-03 p__Firmicutes g__unknown s__unknown # OTU_247 16.8095021 3.6028986 1.1479381 3.138583 1.697670e-03 1.725387e-02 p__Firmicutes g__unknown s__unknown # OTU_4483 3.0041736 3.6683929 0.9203247 3.985977 6.720307e-05 1.736443e-03 p__Firmicutes g__unknown s__unknown # OTU_5971 4.6750189 3.7687244 1.0778169 3.496628 4.711792e-04 7.299361e-03 p__Firmicutes g__unknown s__unknown # OTU_2384 54.3258759 3.7942495 0.8950296 4.239245 2.242731e-05 7.897017e-04 p__Firmicutes g__unknown s__unknown # OTU_9762 34.0383854 3.8087799 0.7509148 5.072186 3.932710e-07 2.448112e-05 p__Firmicutes g__unknown s__unknown # OTU_23 110.3286306 3.9069377 0.6742409 5.794572 6.849552e-09 6.822154e-07 p__Proteobacteria g__Desulfovibrio s__C21_c20 # OTU_1 4960.9791929 3.9646628 0.8359561 4.742669 2.109211e-06 1.167097e-04 p__Firmicutes g__unknown s__unknown # OTU_6689 2.8031402 4.0458498 1.0898211 3.712398 2.053047e-04 3.720854e-03 p__Firmicutes g__unknown s__unknown # OTU_3671 7.9000771 4.0476107 0.7574228 5.343925 9.095504e-08 7.549268e-06 p__Firmicutes g__unknown s__unknown # OTU_273 3.1038213 4.0554291 1.0236007 3.961925 7.434788e-05 1.763107e-03 p__Firmicutes g__unknown s__unknown # OTU_169 4.4814470 4.0993668 0.9642642 4.251290 2.125426e-05 7.897017e-04 p__TM7 g__unknown s__unknown # OTU_4077 1.9747873 4.1112769 1.0959598 3.751302 1.759183e-04 3.650305e-03 p__Firmicutes g__unknown s__unknown # OTU_8231 2.1422748 4.2799254 1.2826220 3.336856 8.473174e-04 1.054910e-02 p__Bacteroidetes g__unknown s__unknown # OTU_5280 2.5230003 4.2944680 1.2562631 3.418446 6.297971e-04 8.712194e-03 p__Firmicutes g__unknown s__unknown # OTU_143 12.0926947 4.3528076 1.3387243 3.251459 1.148142e-03 1.361369e-02 p__Firmicutes g__unknown s__unknown # OTU_9785 1.9357390 4.3587136 0.9590896 4.544636 5.503019e-06 2.491367e-04 p__Firmicutes g__unknown s__unknown # OTU_765 29.7740427 4.4883299 0.7312315 6.138042 8.354460e-10 2.080261e-07 p__Firmicutes g__[Ruminococcus] s__gnavus # OTU_75 14.8211619 4.6735440 1.0604028 4.407329 1.046531e-05 4.343103e-04 p__Deferribacteres g__Mucispirillum s__schaedleri # OTU_2280 25.3305409 4.8848474 0.9426373 5.182107 2.193932e-07 1.560826e-05 p__Firmicutes g__unknown s__unknown # OTU_4273 6.8839093 5.0918977 0.8206810 6.204478 5.487865e-10 2.080261e-07 p__Firmicutes g__unknown s__unknown # OTU_3188 5.0690465 5.4715911 0.9387125 5.828825 5.581900e-09 6.822154e-07 p__Firmicutes g__unknown s__unknown # OTU_3004 13.0870968 5.9460822 1.0245582 5.803557 6.492250e-09 6.822154e-07 p__Firmicutes g__unknown s__unknown ### Low samples # prune to Low samples only phy_obj.low <- prune_samples( phy_obj@sam_data$Treatment == "Low", phy_obj ) phy_obj.low # phyloseq-class experiment-level object # otu_table() OTU Table: [ 800 taxa and 34 samples ] # sample_data() Sample Data: [ 34 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 800 taxa by 7 taxonomic ranks ] min(sample_sums(phy_obj.low)) # 19385 min(taxa_sums(phy_obj.low)) # 0 # prune taxa that have zero sequence reads phy_obj.low <- prune_taxa(taxa = taxa_sums(phy_obj.low) > 0, x = phy_obj.low) phy_obj.low # phyloseq-class experiment-level object # otu_table() OTU Table: [ 608 taxa and 34 samples ] # sample_data() Sample Data: [ 34 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 608 taxa by 7 taxonomic ranks ] table( phy_obj.low@sam_data$Time ) # Week 0 Week 7 # 17 17 # adjust format to avoid errors sel<- which(phy_obj.low@sam_data$Time == "Week 0") phy_obj.low@sam_data$Time[sel] <- "Week_0" sel<- which(phy_obj.low@sam_data$Time == "Week 7") phy_obj.low@sam_data$Time[sel] <- "Week_7" # convert to DESeq dataset phy_obj.low.deseq <- phyloseq_to_deseq2(phy_obj.low, ~ Time) # Deseq2 differntial abundance testing deseq.low <- DESeq(phy_obj.low.deseq, test="Wald", fitType="parametric") # estimating size factors # estimating dispersions # gene-wise dispersion estimates # mean-dispersion relationship # final dispersion estimates # fitting model and testing # -- replacing outliers and refitting for 64 genes # -- DESeq argument 'minReplicatesForReplace' = 7 # -- original counts are preserved in counts(dds) # estimating dispersions # fitting model and testing alpha <- 0.05 res <- results(deseq.low) sigtab.low <- res[which(res$padj < alpha), ] sigtab.low <- cbind(as(sigtab.low, "data.frame"), as(tax_table(phy_obj.low)[rownames(sigtab.low), ], "matrix")) dim(sigtab.low) # 34 13 names(sigtab.low) # [1] "baseMean" "log2FoldChange" "lfcSE" "stat" "pvalue" "padj" "Kingdom" "Phylum" "Class" # [10] "Order" "Family" "Genus" "Species" sigtab.low[ order(sigtab.low$log2FoldChange) ,c( "baseMean", "log2FoldChange", "lfcSE", "stat", "pvalue", "padj", "Phylum", "Genus", "Species" )] # baseMean log2FoldChange lfcSE stat pvalue padj Phylum Genus Species # OTU_195 1.798550 -3.998211 1.1824695 -3.381238 7.216003e-04 1.357296e-02 p__Firmicutes g__Streptococcus s__unknown # OTU_127 292.730303 -3.723245 0.6378793 -5.836913 5.317695e-09 5.251224e-07 p__Bacteroidetes g__Bacteroides s__acidifaciens # OTU_93 24.291110 -3.633685 0.8401941 -4.324816 1.526594e-05 1.108976e-03 p__Bacteroidetes g__Parabacteroides s__unknown # OTU_719 7.689312 -3.179492 0.8619965 -3.688521 2.255612e-04 5.568543e-03 p__Bacteroidetes g__Bacteroides s__unknown # OTU_2164 6.955018 -2.827016 0.7574858 -3.732105 1.898866e-04 5.357514e-03 p__Bacteroidetes g__unknown s__unknown # OTU_142 28.696683 -2.533310 0.5934479 -4.268800 1.965274e-05 1.108976e-03 p__Bacteroidetes g__unknown s__unknown # OTU_7626 5.442626 -2.491337 0.7886089 -3.159154 1.582280e-03 2.334734e-02 p__Bacteroidetes g__unknown s__unknown # OTU_3777 1.945114 -2.484125 0.7890626 -3.148198 1.642806e-03 2.334734e-02 p__Bacteroidetes g__unknown s__unknown # OTU_4743 22.107263 -2.447953 0.5859928 -4.177445 2.948021e-05 1.455586e-03 p__Bacteroidetes g__Bacteroides s__acidifaciens # OTU_3216 5.400312 -2.444080 0.7475802 -3.269322 1.078056e-03 1.774300e-02 p__Bacteroidetes g__unknown s__unknown # OTU_3118 2.952988 -2.238462 0.6839070 -3.273051 1.063933e-03 1.774300e-02 p__Bacteroidetes g__unknown s__unknown # OTU_104 51.397059 -2.214126 0.6953415 -3.184228 1.451406e-03 2.293221e-02 p__Firmicutes g__unknown s__unknown # OTU_2084 29.008145 -1.742054 0.4074178 -4.275841 1.904168e-05 1.108976e-03 p__Bacteroidetes g__unknown s__unknown # OTU_7522 17.423007 -1.538813 0.4210844 -3.654405 2.577790e-04 5.980094e-03 p__Bacteroidetes g__unknown s__unknown # OTU_22 523.474626 -1.389541 0.3864357 -3.595790 3.234090e-04 6.723503e-03 p__Bacteroidetes g__Bacteroides s__acidifaciens # OTU_16 411.255144 -1.284971 0.3431829 -3.744274 1.809165e-04 5.357514e-03 p__Bacteroidetes g__unknown s__unknown # OTU_42 62.786175 -1.230551 0.4093512 -3.006100 2.646220e-03 3.371796e-02 p__Bacteroidetes g__unknown s__unknown # OTU_101 14.072656 1.559269 0.5458072 2.856813 4.279183e-03 4.971403e-02 p__Firmicutes g__Oscillospira s__unknown # OTU_2039 13.802541 1.630119 0.5416970 3.009283 2.618654e-03 3.371796e-02 p__Firmicutes g__unknown s__unknown # OTU_59 50.128602 1.925324 0.5014552 3.839473 1.232985e-04 4.058577e-03 p__Firmicutes g__Butyricicoccus s__pullicaecorum # OTU_5079 17.873409 2.105226 0.6045289 3.482424 4.968964e-04 9.813703e-03 p__Firmicutes g__[Ruminococcus] s__gnavus # OTU_5095 10.794074 2.345546 0.7570808 3.098145 1.947361e-03 2.652440e-02 p__Firmicutes g__Oscillospira s__unknown # OTU_240 12.184679 2.404578 0.6130899 3.922063 8.779394e-05 3.365317e-03 p__Firmicutes g__Coprococcus s__unknown # OTU_9639 2.130227 2.581988 0.8947042 2.885856 3.903501e-03 4.672373e-02 p__Firmicutes g__unknown s__unknown # OTU_2998 97.930762 2.730555 0.6990120 3.906306 9.371770e-05 3.365317e-03 p__Firmicutes g__unknown s__unknown # OTU_23 90.673046 2.838418 0.8500105 3.339275 8.399749e-04 1.508137e-02 p__Proteobacteria g__Desulfovibrio s__C21_c20 # OTU_5280 2.195330 2.844698 0.7814845 3.640120 2.725106e-04 5.980094e-03 p__Firmicutes g__unknown s__unknown # OTU_8615 1.144007 2.912812 0.9258675 3.146035 1.655001e-03 2.334734e-02 p__Bacteroidetes g__unknown s__unknown # OTU_273 4.573067 3.301106 1.1230775 2.939339 3.289130e-03 4.060020e-02 p__Firmicutes g__unknown s__unknown # OTU_765 34.985087 3.533033 0.5361757 6.589319 4.418481e-11 1.524523e-08 p__Firmicutes g__[Ruminococcus] s__gnavus # OTU_4273 6.201800 4.019900 0.6178781 6.505975 7.719105e-11 1.524523e-08 p__Firmicutes g__unknown s__unknown # OTU_2280 4.242216 4.031989 0.9977289 4.041167 5.318584e-05 2.334267e-03 p__Firmicutes g__unknown s__unknown # OTU_332 2.356973 4.722236 1.2764443 3.699523 2.160048e-04 5.568543e-03 p__Firmicutes g__Oscillospira s__unknown # OTU_287 7.096759 5.298301 0.8897829 5.954599 2.607100e-09 3.432681e-07 p__Firmicutes g__unknown s__unknown ### High samples # prune to High samples only phy_obj.high <- prune_samples( phy_obj@sam_data$Treatment == "High", phy_obj ) phy_obj.high # phyloseq-class experiment-level object # otu_table() OTU Table: [ 800 taxa and 36 samples ] # sample_data() Sample Data: [ 36 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 800 taxa by 7 taxonomic ranks ] min(sample_sums(phy_obj.high)) # 11450 min(taxa_sums(phy_obj.high)) # 0 # prune taxa that have zero sequence reads phy_obj.high <- prune_taxa(taxa = taxa_sums(phy_obj.high) > 0, x = phy_obj.high) phy_obj.high # phyloseq-class experiment-level object # otu_table() OTU Table: [ 622 taxa and 36 samples ] # sample_data() Sample Data: [ 36 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 622 taxa by 7 taxonomic ranks ] table( phy_obj.high@sam_data$Time ) # Week 0 Week 7 # 18 18 # adjust format to avoid errors sel<- which(phy_obj.high@sam_data$Time == "Week 0") phy_obj.high@sam_data$Time[sel] <- "Week_0" sel<- which(phy_obj.high@sam_data$Time == "Week 7") phy_obj.high@sam_data$Time[sel] <- "Week_7" # convert to DESeq dataset phy_obj.high.deseq <- phyloseq_to_deseq2(phy_obj.high, ~ Time) # Deseq2 differential abundance testing deseq.high <- DESeq(phy_obj.high.deseq, test="Wald", fitType="parametric") # estimating size factors # estimating dispersions # gene-wise dispersion estimates # mean-dispersion relationship # final dispersion estimates # fitting model and testing # -- replacing outliers and refitting for 65 genes # -- DESeq argument 'minReplicatesForReplace' = 7 # -- original counts are preserved in counts(dds) # estimating dispersions # fitting model and testing alpha <- 0.05 res <- results(deseq.high) sigtab.high <- res[which(res$padj < alpha), ] sigtab.high <- cbind(as(sigtab.high, "data.frame"), as(tax_table(phy_obj.high)[rownames(sigtab.high), ], "matrix")) dim(sigtab.high) # 79 13 names(sigtab.high) # [1] "baseMean" "log2FoldChange" "lfcSE" "stat" "pvalue" "padj" "Kingdom" "Phylum" # [9] "Class" "Order" "Family" "Genus" "Species" sigtab.high[ order(sigtab.high$log2FoldChange) ,c( "baseMean", "log2FoldChange", "lfcSE", "stat", "pvalue", "padj", "Phylum", "Genus", "Species" )] # baseMean log2FoldChange lfcSE stat pvalue padj Phylum Genus Species # OTU_4681 2.043530 -4.079136 1.3724732 -2.972106 2.957641e-03 1.982544e-02 p__Firmicutes g__unknown s__unknown # OTU_195 0.985394 -3.234906 1.0376998 -3.117381 1.824655e-03 1.349615e-02 p__Firmicutes g__Streptococcus s__unknown # OTU_719 17.246642 -3.005322 0.9063864 -3.315719 9.140776e-04 7.842785e-03 p__Bacteroidetes g__Bacteroides s__unknown # OTU_4086 1.080848 -2.781731 1.0266076 -2.709634 6.735742e-03 3.802149e-02 p__Bacteroidetes g__Bacteroides s__unknown # OTU_3777 2.064294 -2.504568 0.6299288 -3.975954 7.009754e-05 1.073994e-03 p__Bacteroidetes g__unknown s__unknown # OTU_127 148.218087 -2.414155 0.5209501 -4.634139 3.584264e-06 1.098321e-04 p__Bacteroidetes g__Bacteroides s__acidifaciens # OTU_93 9.876403 -2.101165 0.7811775 -2.689741 7.150755e-03 3.983992e-02 p__Bacteroidetes g__Parabacteroides s__unknown # OTU_7626 4.076220 -2.050871 0.5449074 -3.763705 1.674141e-04 2.052018e-03 p__Bacteroidetes g__unknown s__unknown # OTU_4743 20.534163 -1.986233 0.7036741 -2.822660 4.762710e-03 2.877750e-02 p__Bacteroidetes g__Bacteroides s__acidifaciens # OTU_2 2851.883968 -1.915679 0.4954935 -3.866204 1.105426e-04 1.529767e-03 p__Firmicutes g__Lactobacillus s__unknown # OTU_21 201.608421 -1.864232 0.6369436 -2.926840 3.424246e-03 2.192539e-02 p__Firmicutes g__unknown s__unknown # OTU_5415 4.801449 -1.722225 0.5339671 -3.225340 1.258232e-03 9.995953e-03 p__Bacteroidetes g__unknown s__unknown # OTU_24 116.627087 -1.616744 0.5253827 -3.077269 2.089066e-03 1.516822e-02 p__Firmicutes g__unknown s__unknown # OTU_3242 68.242178 -1.548283 0.3153554 -4.909645 9.124154e-07 3.558420e-05 p__Bacteroidetes g__unknown s__unknown # OTU_2084 36.388399 -1.536603 0.4028264 -3.814555 1.364284e-04 1.721405e-03 p__Bacteroidetes g__unknown s__unknown # OTU_2846 7.681438 -1.499266 0.4714395 -3.180186 1.471803e-03 1.148007e-02 p__Bacteroidetes g__unknown s__unknown # OTU_22 572.559066 -1.218268 0.3451008 -3.530180 4.152775e-04 4.453852e-03 p__Bacteroidetes g__Bacteroides s__acidifaciens # OTU_2041 106.962104 -1.130011 0.3282132 -3.442918 5.754748e-04 5.647830e-03 p__Bacteroidetes g__unknown s__unknown # OTU_2544 87.397303 -1.023581 0.3613387 -2.832747 4.614986e-03 2.877750e-02 p__Bacteroidetes g__unknown s__unknown # OTU_16 446.315924 -0.958834 0.2786383 -3.441142 5.792646e-04 5.647830e-03 p__Bacteroidetes g__unknown s__unknown # OTU_29 122.220729 1.038246 0.3121266 3.326361 8.798796e-04 7.703436e-03 p__Bacteroidetes g__unknown s__unknown # OTU_60 207.945848 1.307496 0.4943108 2.645089 8.166945e-03 4.434961e-02 p__Firmicutes g__unknown s__unknown # OTU_31 153.055939 1.729748 0.4519056 3.827676 1.293590e-04 1.681666e-03 p__Firmicutes g__unknown s__unknown # OTU_15 585.861078 1.754188 0.5079705 3.453326 5.537193e-04 5.647830e-03 p__Firmicutes g__[Ruminococcus] s__gnavus # OTU_105 42.083956 1.761243 0.5893318 2.988543 2.803113e-03 1.913636e-02 p__Bacteroidetes g__unknown s__unknown # OTU_38 127.999115 1.777875 0.5421738 3.279161 1.041164e-03 8.589600e-03 p__Firmicutes g__unknown s__unknown # OTU_46 38.816777 1.845908 0.6227928 2.963920 3.037477e-03 2.004735e-02 p__Firmicutes g__[Ruminococcus] s__gnavus # OTU_2750 11.509058 1.944012 0.7034053 2.763715 5.714737e-03 3.358386e-02 p__Bacteroidetes g__unknown s__unknown # OTU_91 11.563760 1.955792 0.5511539 3.548541 3.873720e-04 4.261092e-03 p__Bacteroidetes g__unknown s__unknown # OTU_3456 3.869395 2.047931 0.5256209 3.896212 9.770883e-05 1.397236e-03 p__Bacteroidetes g__unknown s__unknown # OTU_3876 3.314630 2.055860 0.6557929 3.134923 1.718994e-03 1.293769e-02 p__Bacteroidetes g__unknown s__unknown # OTU_556 15.699203 2.076692 0.7620504 2.725138 6.427465e-03 3.676510e-02 p__Firmicutes g__Oscillospira s__unknown # OTU_101 10.908299 2.165301 0.5063512 4.276282 1.900406e-05 3.396976e-04 p__Firmicutes g__Oscillospira s__unknown # OTU_5520 6.523735 2.198683 0.5704933 3.854004 1.162017e-04 1.557829e-03 p__Firmicutes g__Dorea s__unknown # OTU_115 18.198058 2.302950 0.5159827 4.463230 8.073313e-06 2.164657e-04 p__Firmicutes g__Ruminococcus s__unknown # OTU_6509 6.358935 2.362478 0.7086561 3.333743 8.568565e-04 7.658155e-03 p__Bacteroidetes g__unknown s__unknown # OTU_5915 1.177450 2.381407 0.8584033 2.774229 5.533273e-03 3.296908e-02 p__Bacteroidetes g__unknown s__unknown # OTU_2419 259.000746 2.406283 0.7030403 3.422682 6.200654e-04 5.911290e-03 p__Firmicutes g__unknown s__unknown # OTU_4273 5.971702 2.524909 0.8934620 2.825983 4.713571e-03 2.877750e-02 p__Firmicutes g__unknown s__unknown # OTU_4483 2.462432 2.596384 0.9430368 2.753216 5.901292e-03 3.421155e-02 p__Firmicutes g__unknown s__unknown # OTU_9762 30.654538 2.629714 0.8597311 3.058763 2.222526e-03 1.563055e-02 p__Firmicutes g__unknown s__unknown # OTU_64 35.953911 2.692941 0.8328833 3.233275 1.223797e-03 9.905826e-03 p__Firmicutes g__unknown s__unknown # OTU_5765 2.648711 2.697816 0.8207166 3.287146 1.012082e-03 8.513395e-03 p__Firmicutes g__unknown s__unknown # OTU_37 50.345166 2.749703 0.7729584 3.557375 3.745788e-04 4.228797e-03 p__Firmicutes g__unknown s__unknown # OTU_3669 16.180306 2.749773 0.6679435 4.116775 3.842113e-05 6.339486e-04 p__Firmicutes g__[Ruminococcus] s__gnavus # OTU_2039 11.747794 2.786055 0.5206581 5.351027 8.745664e-08 4.168766e-06 p__Firmicutes g__unknown s__unknown # OTU_4387 1.369493 2.846796 0.9528173 2.987767 2.810235e-03 1.913636e-02 p__Firmicutes g__unknown s__unknown # OTU_5079 15.740123 2.976611 0.6383231 4.663174 3.113699e-06 1.027521e-04 p__Firmicutes g__[Ruminococcus] s__gnavus # OTU_287 4.452519 3.031707 0.8785625 3.450758 5.590145e-04 5.647830e-03 p__Firmicutes g__unknown s__unknown # OTU_109 13.644146 3.062056 0.9138082 3.350873 8.055711e-04 7.352979e-03 p__Firmicutes g__unknown s__unknown # OTU_1 2956.272933 3.082037 0.6992884 4.407391 1.046232e-05 2.406540e-04 p__Firmicutes g__unknown s__unknown # OTU_5375 2.348260 3.158170 1.1166597 2.828230 4.680615e-03 2.877750e-02 p__Firmicutes g__unknown s__unknown # OTU_196 8.286051 3.207144 0.7490111 4.281838 1.853562e-05 3.396976e-04 p__Firmicutes g__unknown s__unknown # OTU_765 32.960477 3.208995 0.7305768 4.392413 1.120997e-05 2.406540e-04 p__Firmicutes g__[Ruminococcus] s__gnavus # OTU_10092 1.804068 3.269152 1.1068356 2.953603 3.140882e-03 2.041573e-02 p__Firmicutes g__unknown s__unknown # OTU_139 9.465414 3.292801 0.7232097 4.553037 5.287690e-06 1.512279e-04 p__Firmicutes g__Anaerotruncus s__unknown # OTU_125 4.681573 3.445857 0.7845344 4.392232 1.121930e-05 2.406540e-04 p__Firmicutes g__Ruminococcus s__unknown # OTU_7377 12.186049 3.469706 0.8587130 4.040588 5.331731e-05 8.471528e-04 p__Bacteroidetes g__unknown s__unknown # OTU_9265 18.151516 3.621745 0.8288836 4.369426 1.245738e-05 2.544865e-04 p__Bacteroidetes g__unknown s__unknown # OTU_75 6.957835 3.654925 1.1894892 3.072684 2.121430e-03 1.516822e-02 p__Deferribacteres g__Mucispirillum s__schaedleri # OTU_6190 2.233411 3.659486 1.0744411 3.405944 6.593580e-04 6.149230e-03 p__Firmicutes g__Clostridium s__methylpentosum # OTU_2998 35.340798 3.670364 0.6399754 5.735164 9.741831e-09 5.970351e-07 p__Firmicutes g__unknown s__unknown # OTU_9505 2.924012 3.906800 0.8962776 4.358917 1.307075e-05 2.548796e-04 p__Firmicutes g__unknown s__unknown # OTU_9785 1.981434 4.044040 1.5080439 2.681646 7.326086e-03 4.029347e-02 p__Firmicutes g__unknown s__unknown # OTU_3408 1.646883 4.059738 1.1208914 3.621884 2.924656e-04 3.485215e-03 p__Firmicutes g__unknown s__unknown # OTU_82 8.882978 4.060109 0.8480131 4.787790 1.686281e-06 6.028454e-05 p__Actinobacteria g__unknown s__unknown # OTU_23 84.607384 4.112387 0.7798565 5.273261 1.340206e-07 5.749486e-06 p__Proteobacteria g__Desulfovibrio s__C21_c20 # OTU_26 215.515354 4.145496 0.9836592 4.214362 2.504851e-05 4.298325e-04 p__Bacteroidetes g__unknown s__unknown # OTU_5790 3.186627 4.174087 1.0696575 3.902265 9.529673e-05 1.397236e-03 p__Firmicutes g__unknown s__unknown # OTU_35 146.533118 4.174864 1.1581124 3.604887 3.122893e-04 3.620868e-03 p__Bacteroidetes g__Prevotella s__unknown # OTU_2760 15.676237 4.256497 0.7123156 5.975577 2.292769e-09 1.937759e-07 p__Firmicutes g__unknown s__unknown # OTU_173 5.366274 4.295784 0.8016890 5.358416 8.395459e-08 4.168766e-06 p__Firmicutes g__unknown s__unknown # OTU_1737 11.412714 4.310080 0.5945543 7.249263 4.190462e-13 8.988542e-11 p__Firmicutes g__unknown s__unknown # OTU_616 59.092899 4.311497 0.6840614 6.302792 2.923319e-10 4.180346e-08 p__Firmicutes g__unknown s__unknown # OTU_481 137.340586 4.322291 0.7266485 5.948256 2.710153e-09 1.937759e-07 p__Firmicutes g__unknown s__unknown # OTU_3188 2.279943 4.371347 1.3849341 3.156358 1.597528e-03 1.223821e-02 p__Firmicutes g__unknown s__unknown # OTU_3004 7.934426 4.524324 1.0239869 4.418342 9.946115e-06 2.406540e-04 p__Firmicutes g__unknown s__unknown # OTU_2280 9.365913 5.649581 0.9478189 5.960612 2.512947e-09 1.937759e-07 p__Firmicutes g__unknown s__unknown # OTU_79 13.129839 23.166880 2.9259099 7.917838 2.416762e-15 1.036791e-12 p__Bacteroidetes g__Prevotella s__unknown ### compare control / low / high scenarios ## Control taxa.sig.diffabun.cont <- row.names(sigtab.cont) length(taxa.sig.diffabun.cont) # 75 # Increasing sel <- which(sigtab.cont$log2FoldChange > 0) # qty 55 taxa.sig.diffabun.Inc.cont <- row.names(sigtab.cont)[sel] taxa.sig.diffabun.Inc.cont # [1] "OTU_1" "OTU_15" "OTU_19" "OTU_23" "OTU_46" "OTU_57" "OTU_59" "OTU_64" "OTU_67" "OTU_75" "OTU_101" # [12] "OTU_125" "OTU_130" "OTU_133" "OTU_143" "OTU_169" "OTU_201" "OTU_202" "OTU_247" "OTU_254" "OTU_258" "OTU_273" # [23] "OTU_402" "OTU_419" "OTU_481" "OTU_531" "OTU_616" "OTU_765" "OTU_2039" "OTU_2280" "OTU_2384" "OTU_2419" "OTU_2760" # [34] "OTU_2934" "OTU_2998" "OTU_3004" "OTU_3188" "OTU_3671" "OTU_3795" "OTU_4034" "OTU_4077" "OTU_4273" "OTU_4387" "OTU_4483" # [45] "OTU_4702" "OTU_5280" "OTU_5971" "OTU_6190" "OTU_6689" "OTU_7119" "OTU_8231" "OTU_9505" "OTU_9762" "OTU_9785" "OTU_10092" # Decreasing sel <- which(sigtab.cont$log2FoldChange < 0) # qty 20 taxa.sig.diffabun.Dec.cont <- row.names(sigtab.cont)[sel] taxa.sig.diffabun.Dec.cont # [1] "OTU_2" "OTU_7" "OTU_16" "OTU_22" "OTU_45" "OTU_81" "OTU_112" "OTU_127" "OTU_142" "OTU_2041" "OTU_2084" "OTU_2544" # [13] "OTU_2846" "OTU_2913" "OTU_3242" "OTU_3406" "OTU_3777" "OTU_3988" "OTU_5415" "OTU_7626" # how many Phyla in increasing? length(unique((sigtab.cont$Phylum[ which(sigtab.cont$log2FoldChange > 0) ]))) # 6 ## Low taxa.sig.diffabun.low <- row.names(sigtab.low) length(taxa.sig.diffabun.low) # 34 # Increasing sel <- which(sigtab.low$log2FoldChange > 0) # qty 17 taxa.sig.diffabun.Inc.low <- row.names(sigtab.low)[sel] taxa.sig.diffabun.Inc.low # [1] "OTU_23" "OTU_59" "OTU_101" "OTU_240" "OTU_273" "OTU_287" "OTU_332" "OTU_765" "OTU_2039" "OTU_2280" "OTU_2998" "OTU_4273" # [13] "OTU_5079" "OTU_5095" "OTU_5280" "OTU_8615" "OTU_9639" # Decreasing sel <- which(sigtab.low$log2FoldChange < 0) # qty 17 taxa.sig.diffabun.Dec.low <- row.names(sigtab.low)[sel] taxa.sig.diffabun.Dec.low # [1] "OTU_16" "OTU_22" "OTU_42" "OTU_93" "OTU_104" "OTU_127" "OTU_142" "OTU_195" "OTU_719" "OTU_2084" "OTU_2164" "OTU_3118" # [13] "OTU_3216" "OTU_3777" "OTU_4743" "OTU_7522" "OTU_7626" # how many Phyla in increasing? length(unique((sigtab.low$Phylum[ which(sigtab.low$log2FoldChange > 0) ]))) # 3 ## High taxa.sig.diffabun.high <- row.names(sigtab.high) length(taxa.sig.diffabun.high) # 79 # Increasing sel <- which(sigtab.high$log2FoldChange > 0) # qty 59 taxa.sig.diffabun.Inc.high <- row.names(sigtab.high)[sel] taxa.sig.diffabun.Inc.high # [1] "OTU_1" "OTU_15" "OTU_23" "OTU_26" "OTU_29" "OTU_31" "OTU_35" "OTU_37" "OTU_38" "OTU_46" "OTU_60" # [12] "OTU_64" "OTU_75" "OTU_79" "OTU_82" "OTU_91" "OTU_101" "OTU_105" "OTU_109" "OTU_115" "OTU_125" "OTU_139" # [23] "OTU_173" "OTU_196" "OTU_287" "OTU_481" "OTU_556" "OTU_616" "OTU_765" "OTU_1737" "OTU_2039" "OTU_2280" "OTU_2419" # [34] "OTU_2750" "OTU_2760" "OTU_2998" "OTU_3004" "OTU_3188" "OTU_3408" "OTU_3456" "OTU_3669" "OTU_3876" "OTU_4273" "OTU_4387" # [45] "OTU_4483" "OTU_5079" "OTU_5375" "OTU_5520" "OTU_5765" "OTU_5790" "OTU_5915" "OTU_6190" "OTU_6509" "OTU_7377" "OTU_9265" # [56] "OTU_9505" "OTU_9762" "OTU_9785" "OTU_10092" # Decreasing sel <- which(sigtab.high$log2FoldChange < 0) # qty 20 taxa.sig.diffabun.Dec.high <- row.names(sigtab.high)[sel] taxa.sig.diffabun.Dec.high # [1] "OTU_2" "OTU_16" "OTU_21" "OTU_22" "OTU_24" "OTU_93" "OTU_127" "OTU_195" "OTU_719" "OTU_2041" "OTU_2084" "OTU_2544" # [13] "OTU_2846" "OTU_3242" "OTU_3777" "OTU_4086" "OTU_4681" "OTU_4743" "OTU_5415" "OTU_7626" # how many Phyla in increasing? length(unique((sigtab.high$Phylum[ which(sigtab.high$log2FoldChange > 0) ]))) # 5 ### Euler diagrams to show overlaps for Increasing / Decreasing taxa among different Treatments ### Increasing taxa # taxa.sig.diffabun.Inc.cont # taxa.sig.diffabun.Inc.low # taxa.sig.diffabun.Inc.high x = list( "Control" = taxa.sig.diffabun.Inc.cont, "Low" = taxa.sig.diffabun.Inc.low, "High" = taxa.sig.diffabun.Inc.high) overlap.inc <- calculate.overlap(x) as.numeric(lapply(x, FUN = length)) # 55 17 59 - i.e. Control 55, Low 17, High 59 as.numeric(lapply(overlap.inc, FUN = length)) # 7 3 19 2 26 5 31 str(overlap.inc) # List of 7 # $ a5: chr [1:7] "OTU_23" "OTU_101" "OTU_765" "OTU_2039" ... # $ a2: chr [1:3] "OTU_59" "OTU_273" "OTU_5280" # $ a4: chr [1:19] "OTU_1" "OTU_15" "OTU_46" "OTU_64" ... # $ a6: chr [1:2] "OTU_287" "OTU_5079" # $ a1: chr [1:26] "OTU_19" "OTU_57" "OTU_67" "OTU_130" ... # $ a3: chr [1:5] "OTU_240" "OTU_332" "OTU_5095" "OTU_8615" ... # $ a7: chr [1:31] "OTU_26" "OTU_29" "OTU_31" "OTU_35" ... # Control 55, Low 17, High 59 # Control&Low length(which(x[[1]] %in% x[[2]])) # 10 x[[1]][which(x[[1]] %in% x[[2]])] # "OTU_23" "OTU_59" "OTU_101" "OTU_273" "OTU_765" "OTU_2039" "OTU_2280" "OTU_2998" "OTU_4273" "OTU_5280" # Control&High length(which(x[[1]] %in% x[[3]])) # 26 # Low&High length(which(x[[2]] %in% x[[3]])) # 9 # Control&Low&High sel <- which(x[[1]] %in% x[[2]]) length(which(x[[1]][sel] %in% x[[3]])) # 7 ## use 'eulerr' package combo <- c("Control" = 55-(10-7)-(26-7)-7, "Low" = 17-(10-7)-(9-7)-7, "High" = 59-(26-7)-(9-7)-7, "Control&Low" = 10-7, "Control&High" = 26-7, "Low&High" = 9-7, "Control&Low&High" = 7) fit.circ.inc <- euler(combo) fit.circ.inc plot(fit.circ.inc) cols <- c("Control" = "#f46d43", "Low" = "#66c2a5" , "High" = "#5e4fa2" ) # orange, aqua, dark-blue plot(fit.circ.inc, quantities = TRUE, fills = list(fill = cols, alpha = 0.3) ) # https://stat.ethz.ch/R-manual/R-devel/library/grid/html/unit.html grid.text(label = "A", x = unit(0.05, "npc") , y = unit(0.95,"npc"), gp=gpar(fontsize=16, fontface="bold") ) dev.print(tiff, file = paste0("plots/","Euler-diagram-Increasing-diff-abun-taxa-by-Treatments-vFINAL-A.tif"), width = 7, height = 10, units = "cm", res=600, compression="lzw") ## taxa that are only increasing in High: overlap.inc$a7 # [1] "OTU_26" "OTU_29" "OTU_31" "OTU_35" "OTU_37" "OTU_38" "OTU_60" "OTU_79" "OTU_82" "OTU_91" "OTU_105" # [12] "OTU_109" "OTU_115" "OTU_139" "OTU_173" "OTU_196" "OTU_556" "OTU_1737" "OTU_2750" "OTU_3408" "OTU_3456" "OTU_3669" # [23] "OTU_3876" "OTU_5375" "OTU_5520" "OTU_5765" "OTU_5790" "OTU_5915" "OTU_6509" "OTU_7377" "OTU_9265" write.csv(overlap.inc$a7,file="SigDiffAbun-High-only-vFINAL1.csv", row.names = FALSE) sel <- which( !(x[[3]] %in% x[[1]]) & !(x[[3]] %in% x[[2]])) x[[3]][sel] # [1] "OTU_26" "OTU_29" "OTU_31" "OTU_35" "OTU_37" "OTU_38" "OTU_60" "OTU_79" "OTU_82" "OTU_91" "OTU_105" # [12] "OTU_109" "OTU_115" "OTU_139" "OTU_173" "OTU_196" "OTU_556" "OTU_1737" "OTU_2750" "OTU_3408" "OTU_3456" "OTU_3669" # [23] "OTU_3876" "OTU_5375" "OTU_5520" "OTU_5765" "OTU_5790" "OTU_5915" "OTU_6509" "OTU_7377" "OTU_9265" ## taxa that are only increasing in Low: sel <- which( !(x[[2]] %in% x[[1]]) & !(x[[2]] %in% x[[3]])) x[[2]][sel] # [1] "OTU_240" "OTU_332" "OTU_5095" "OTU_8615" "OTU_9639" overlap.inc$a3 # [1] "OTU_240" "OTU_332" "OTU_5095" "OTU_8615" "OTU_9639" ## taxa that are only increasing in Control: sel <- which( !(x[[1]] %in% x[[2]]) & !(x[[1]] %in% x[[3]])) x[[1]][sel] # [1] "OTU_19" "OTU_57" "OTU_67" "OTU_130" "OTU_133" "OTU_143" "OTU_169" "OTU_201" "OTU_202" "OTU_247" "OTU_254" # [12] "OTU_258" "OTU_402" "OTU_419" "OTU_531" "OTU_2384" "OTU_2934" "OTU_3671" "OTU_3795" "OTU_4034" "OTU_4077" "OTU_4702" # [23] "OTU_5971" "OTU_6689" "OTU_7119" "OTU_8231" overlap.inc$a1 # [1] "OTU_19" "OTU_57" "OTU_67" "OTU_130" "OTU_133" "OTU_143" "OTU_169" "OTU_201" "OTU_202" "OTU_247" "OTU_254" # [12] "OTU_258" "OTU_402" "OTU_419" "OTU_531" "OTU_2384" "OTU_2934" "OTU_3671" "OTU_3795" "OTU_4034" "OTU_4077" "OTU_4702" # [23] "OTU_5971" "OTU_6689" "OTU_7119" "OTU_8231" ## Decreasing taxa # taxa.sig.diffabun.Dec.cont # taxa.sig.diffabun.Dec.low # taxa.sig.diffabun.Dec.high x = list( "Control" = taxa.sig.diffabun.Dec.cont, "Low" = taxa.sig.diffabun.Dec.low, "High" = taxa.sig.diffabun.Dec.high) overlap.dec <- calculate.overlap(x) as.numeric(lapply(x, FUN = length)) # 20 17 20 as.numeric(lapply(overlap.dec, FUN = length)) # 6 1 6 4 7 6 4 str(overlap.dec) # List of 7 # $ a5: chr [1:6] "OTU_16" "OTU_22" "OTU_127" "OTU_2084" ... # $ a2: chr "OTU_142" # $ a4: chr [1:6] "OTU_2" "OTU_2041" "OTU_2544" "OTU_2846" ... # $ a6: chr [1:4] "OTU_93" "OTU_195" "OTU_719" "OTU_4743" # $ a1: chr [1:7] "OTU_7" "OTU_45" "OTU_81" "OTU_112" ... # $ a3: chr [1:6] "OTU_42" "OTU_104" "OTU_2164" "OTU_3118" ... # $ a7: chr [1:4] "OTU_21" "OTU_24" "OTU_4086" "OTU_4681" # i.e. Control 20, Low 17, High 20 # Control&Low length(which(x[[1]] %in% x[[2]])) # 7 # Control&High length(which(x[[1]] %in% x[[3]])) # 12 # Low&High length(which(x[[2]] %in% x[[3]])) # 10 # Control&Low&High sel <- which(x[[1]] %in% x[[2]]) length(which(x[[1]][sel] %in% x[[3]])) # 6 ## use 'eulerr' package combo <- c("Control" = 20-(7-6)-(12-6)-6, "Low" = 21-(7-6)-(10-6)-6, "High" = 20-(12-6)-(10-6)-6, "Control&Low" = 7-6, "Control&High" = 12-6, "Low&High" = 10-6, "Control&Low&High" = 6) fit.circ.dec <- euler(combo) fit.circ.dec plot(fit.circ.dec) cols <- c("Control" = "#f46d43", "Low" = "#66c2a5" , "High" = "#5e4fa2" ) # orange, aqua, dark-blue plot(fit.circ.dec, quantities = TRUE, fills = list(fill = cols, alpha = 0.3) ) # https://stat.ethz.ch/R-manual/R-devel/library/grid/html/unit.html grid.text(label = "B", x = unit(0.05, "npc") , y = unit(0.95,"npc"), gp=gpar(fontsize=16, fontface="bold") ) dev.print(tiff, file = paste0("plots/","Euler-diagram-Decreasing-diff-abun-taxa-by-Treatments-decontam-vFINAL-B.tif"), width = 6, height = 10, units = "cm", res=600, compression="lzw") ### Heatmap visualisation of the significantly increasing taxa in Week 7 fecal samples ## use previously cleaned phyloseq object for Week 7 fecal samples fecalw7.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 645 taxa and 53 samples ] # sample_data() Sample Data: [ 53 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 645 taxa by 7 taxonomic ranks ] ## isolate only significant differentially abundant increasing taxa from each Treatment phy_obj <- fecalw7.dc.16s ## Control # subselect Week 7 phy_obj.cont.w7 <- prune_samples( phy_obj@sam_data$Treatment == "Control", phy_obj ) # subselect taxa that are significantly differentially abundant and increasing phy_obj.cont.w7.Inc <- prune_taxa( phy_obj.cont.w7, taxa = taxa.sig.diffabun.Inc.cont ) ## Low # subselect Week 7 phy_obj.low.w7 <- prune_samples( phy_obj@sam_data$Treatment == "Low", phy_obj ) # subselect taxa that are significantly differentially abundant and increasing phy_obj.low.w7.Inc <- prune_taxa( phy_obj.low.w7, taxa = taxa.sig.diffabun.Inc.low ) ## High # subselect Week 7 phy_obj.high.w7 <- prune_samples( phy_obj@sam_data$Treatment == "High", phy_obj ) # subselect taxa that are significantly differentially abundant and increasing phy_obj.high.w7.Inc <- prune_taxa( phy_obj.high.w7, taxa = taxa.sig.diffabun.Inc.high ) ## 2. Merge the filtered Week 7 samples merge.phy_obj.w7.Inc <- merge_phyloseq(phy_obj.cont.w7.Inc, phy_obj.low.w7.Inc) # step 1 merge.phy_obj.w7.Inc <- merge_phyloseq(merge.phy_obj.w7.Inc, phy_obj.high.w7.Inc) # step 2 min(sample_sums(merge.phy_obj.w7.Inc)) # 181 min(taxa_sums(merge.phy_obj.w7.Inc)) # 31 sort(sample_sums(merge.phy_obj.w7.Inc)) # L5m3T16 L2m3T16 L5m1T16 L4m2T16 L5m2T16 L2m1T16 L4m1T16 L1m3T16 L6m2T16 L2m2T16 L3m1T16 L1m1T16 C2m3T16 L4m3T16 L3m3T16 L6m1T16 # 181 223 283 311 348 407 453 571 728 744 747 767 771 909 959 1191 # L3m2T16 H3m2T16 L6m3T16 C1m1T16 H4m2T16 H6m1T16 C1m3T16 H5m1T16 H3m3T16 C2m1T16 H6m2T16 C1m2T16 H1m1T16 C3m3T16 H6m3T16 H2m1T16 # 1372 1438 1622 1891 3556 3563 3567 4103 4180 4229 5089 5120 5548 5750 6059 6497 # C2m2T16 H3m1T16 C4m2T16 H1m2T16 C3m2T16 C4m1T16 C3m1T16 C6m2T16 C5m3T16 H4m3T16 C5m1T16 C5m2T16 C4m3T16 H1m3T16 H4m1T16 H2m3T16 # 7616 7684 8028 8272 9219 9286 9553 11153 11520 14358 14952 15158 15381 15565 15814 16732 # H5m2T16 H2m2T16 H5m3T16 C6m1T16 C6m3T16 # 17812 18516 20252 23901 23937 ## Normalise by converting to relative abundance ... relabun.merge.phy_obj.w7.Inc <- transform_sample_counts(merge.phy_obj.w7.Inc, function(x) x / sum(x) ) relabun.merge.phy_obj.w7.Inc # phyloseq-class experiment-level object # otu_table() OTU Table: [ 93 taxa and 53 samples ] # sample_data() Sample Data: [ 53 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 93 taxa by 7 taxonomic ranks ] dim(relabun.merge.phy_obj.w7.Inc@otu_table) # 93 taxa x 53 samples mat0 <- as.matrix(as.data.frame(relabun.merge.phy_obj.w7.Inc@otu_table)) mat0 <- t(mat0) # now samples are rows, taxa are columns dim(mat0) # 53 93 ok <- complete.cases(mat0) sel <- which(ok==FALSE) # empty, i.e. all rows are complete, no NAs row.names(mat0) # [1] "C1m3T16" "C3m3T16" "C5m3T16" "C6m1T16" "C4m1T16" "C2m1T16" "C4m2T16" "C6m2T16" "C2m2T16" "C3m1T16" "C1m1T16" "C5m1T16" # [13] "C1m2T16" "C3m2T16" "C5m2T16" "C4m3T16" "C6m3T16" "C2m3T16" "L5m3T16" "L3m3T16" "L1m3T16" "L2m2T16" "L6m2T16" "L4m2T16" # [25] "L2m1T16" "L4m1T16" "L6m1T16" "L5m2T16" "L3m2T16" "L5m1T16" "L1m1T16" "L3m1T16" "L2m3T16" "L6m3T16" "L4m3T16" "H5m1T16" # [37] "H3m1T16" "H1m1T16" "H5m2T16" "H1m2T16" "H3m2T16" "H2m3T16" "H4m3T16" "H6m3T16" "H5m3T16" "H1m3T16" "H3m3T16" "H2m1T16" # [49] "H6m1T16" "H4m1T16" "H2m2T16" "H4m2T16" "H6m2T16" temp <- mat0 # convert for display purposes. Now all are week 7 (T16) so we can delete this row.names(mat0) <- gsub(pattern = "T16", replacement = "", x = row.names(mat0)) row.names(mat0) # [1] "C1m3" "C3m3" "C5m3" "C6m1" "C4m1" "C2m1" "C4m2" "C6m2" "C2m2" "C3m1" "C1m1" "C5m1" "C1m2" "C3m2" "C5m2" "C4m3" "C6m3" "C2m3" # [19] "L5m3" "L3m3" "L1m3" "L2m2" "L6m2" "L4m2" "L2m1" "L4m1" "L6m1" "L5m2" "L3m2" "L5m1" "L1m1" "L3m1" "L2m3" "L6m3" "L4m3" "H5m1" # [37] "H3m1" "H1m1" "H5m2" "H1m2" "H3m2" "H2m3" "H4m3" "H6m3" "H5m3" "H1m3" "H3m3" "H2m1" "H6m1" "H4m1" "H2m2" "H4m2" "H6m2" ## convert to z-scores mat0.zcol <- base::scale(mat0) # default scaling method scales the columns of a numeric matrix mat0[1:5,1:5] mat0.zcol[1:5,1:5] # scales within taxa (by column) mat0.zcol.zrow <- t( base::scale(t(mat0.zcol)) ) mat0.zcol.zrow[1:5, 1:5] # then scale within samples (by row) class(mat0.zcol.zrow) # matrix mat <- mat0.zcol.zrow heatmap.2(mat, trace="none",col=colorRampPalette(c("blue","white","red"))(20), margins = c(5,5)) getwd() # "C:/Workspace/PROJ/PAPER-MICRO-MICE/modelling" heatmap.2(mat, trace = "none", dendrogram = "both", col=colorRampPalette(c("red","black","green"))(20), rowsep=c(17,35), margins = c(2,5), # cexRow = 1, key.title = "Z-score", keysize = 1.5, xlab = "OTU relative abundance (z-score)", labCol = "", RowSideColors=c( # Control ="#f46d43", # orange # Low ="#66c2a5", # green-blue # High ="#5e4fa2", dark-blue "C1m3"="#f46d43", "C3m3"="#f46d43", "C5m3"="#f46d43", "C6m1"="#f46d43", "C4m1"="#f46d43", "C2m1"="#f46d43", "C4m2"="#f46d43", "C6m2"="#f46d43", "C2m2"="#f46d43", "C3m1"="#f46d43", "C1m1"="#f46d43", "C5m1"="#f46d43", "C1m2"="#f46d43", "C3m2"="#f46d43", "C5m2"="#f46d43", "C4m3"="#f46d43", "C6m3"="#f46d43", "C2m3"="#f46d43", "L5m3"="#66c2a5", "L3m3"="#66c2a5", "L1m3"="#66c2a5", "L2m2"="#66c2a5", "L6m2"="#66c2a5", "L4m2"="#66c2a5", "L2m1"="#66c2a5", "L4m1"="#66c2a5", "L6m1"="#66c2a5", "L5m2"="#66c2a5", "L3m2"="#66c2a5", "L5m1"="#66c2a5", "L1m1"="#66c2a5", "L3m1"="#66c2a5", "L2m3"="#66c2a5", "L6m3"="#66c2a5", "L4m3"="#66c2a5", "H5m1"="#5e4fa2", "H3m1"="#5e4fa2", "H1m1"="#5e4fa2", "H5m2"="#5e4fa2", "H1m2"="#5e4fa2", "H3m2"="#5e4fa2", "H2m3"="#5e4fa2", "H4m3"="#5e4fa2", "H6m3"="#5e4fa2", "H5m3"="#5e4fa2", "H1m3"="#5e4fa2", "H3m3"="#5e4fa2", "H2m1"="#5e4fa2", "H6m1"="#5e4fa2", "H4m1"="#5e4fa2", "H2m2"="#5e4fa2", "H4m2"="#5e4fa2", "H6m2"="#5e4fa2" ) ) legend(x = 0.2, y = 1.07, xpd = TRUE, # top left legend = c( "Control", "Low", "High" ), col = c( "Control" ="#f46d43", # orange "Low" = "#66c2a5", # green-blue "High" ="#5e4fa2" # dark-blue ), lty= 1, lwd = 5, cex=.6 , xjust = 0, # how the legend is to be justified relative to the legend x location. A value of 0 means left justified, 0.5 means centered and 1 means right justified yjust = 0 # how the legend is to be justified relative to the legend x location. A value of 0 means left justified, 0.5 means centered and 1 means right justified ) dev.print(tiff, file=paste0("plots/","Heatmap-Sig-DiffAbun-Increasing-taxa-zcol-zrow-both-dendograms-vFINAL.tif"), width = 18.3, height = 16, units = "cm", res = 600, compression = "lzw") #------------------------ #### Get closest NCBI match / putative species #------------------------ # https://rdrr.io/github/mhahsler/rBLAST/man/BLAST.html # Accessed 2019-01-25 # ftp://ftp.ncbi.nlm.nih.gov/blast/db # ftp://ftp.ncbi.nlm.nih.gov/blast/db/README # DATABASE COPY # ftp://ftp.ncbi.nlm.nih.gov/blast/db/16SMicrobial.tar.gz (Dated 20/01/2019) # EXECUTABLE COPY # Install software from: # ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/LATEST/ncbi-blast-2.8.1+-win64.exe (Dated 27/11/2018) # call using -db 16SMicrobial # Record Expectation (E) value - use threshold of 10e-10 to denote significant matches (Pearson 2013) ## fasta sequences are available from seqs_df <- read_excel(path= paste0(datadir,"/","OTU-rep-sequences.xlsx"), sheet=1, range="B1:C10485", col_names = TRUE) seqs_df <- as.data.frame(seqs_df) str(seqs_df) ## Find interpretable putative species assignment from NCBI bacterial 16S database names(seqs_df) # "subject" "OTU-seq" head(seqs_df[ ,c("subject","OTU-seq")]) # subject # 1 OTU_1 # 2 OTU_2 # 3 OTU_3 # 4 OTU_4 # 5 OTU_5 # 6 OTU_6 # OTU-seq # 1 TGGGGGATATTGCACAATGGGGGGAACCCTGATGCAGCGACGCCGCGTGGGCGATGGAGTGCTTCGGCATGTAAAGCCCTATCAGCAGGGAAGAAAATGACGGTACCTGACTAAGAAGCCCCGGCTAAATACGTGCCAGCAGCCGCGGTAATACGTATGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGGCGGCCTGGCAAGCCTGATGTGAAATACCGGGGCCCAACCCCGGGGCTGCATTGGGAACTGCCAGGCTGGAGTGCCGGAGAGGCAGGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCCTGCTGGACGGTGACTGACGCTGAGGCTCGAAAGCGTGGGGAGCAAACAGG # 2 TAGGGAATCTTCCACAATGGGCGAAAGCCTGATGGAGCAACGCCGCGTGGGTGAAGAAGGTCTTCGGATCGTAAAACCCTGTTGTTAGAGAAGAAAGTGCGTGAGAGTAACTGTTCACGTTTCGACGGTATCTAACCAGAAAGCCACGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTATCCGGATTTATTGGGCGTAAAGGGAACGCAGGCGGTCTTTTAAGTCTGATGTGAAAGCCTTCGGCTTAACCGGAGTAGTGCATTGGAAACTGGGAGACTTGAGTGCAGAAGAGGAGAGTGGAACTCCATGTGTAGCGGTGAAATGCGTAGATATATGGAAGAACACCAGTGGCGAAAGCGGCTCTCTGGTCTGTAACTGACGCTGAGGTTCGAAAGCGTGGGTAGCAAACAGG # 3 TGGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCGACGCCGCGTGAGTGAAGAAGTATTTCGGTATGTAAAGCTCTATCAGCAGGGAAGAAAATGACGGTACCTGATTAAGAAGCCCCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCAGCGCAAGTCTGAAGTGAAATGCCGGGGCTTAACCCCGGAACTGCTTTGGAAACTGTGCAGCTAGAGTGCAGGAGAGGTAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACTGTAACTGACGTTGAGGCTCGAAAGCGTGGGGAGCAAACAGG # 4 TGGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCGACGCCGCGTGAGTGAAGAAGTATTTCGGTATGTAAAGCTCTATCAGCAGGGAAGAAATAGCGACCTTTGGGTCGCCAGACGGTACCTGATTAAGAAGCCCCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCAGCGCAAGTCTGAAGTGAAATGCCGGGGCTTAACCCCGGAACTGCTTTGGAAACTGTGCAGCTAGAGTGCAGGAGAGGTAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACTGTAACTGACGTTGAGGCTCGAAAGCGTGGGGAGCAAACAGG # 5 TGGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCGACGCCGCGTGAGTGAAGAAGTATTTCGGTATGTAAAGCTCTATCAGCAGGGAAGAAAAAAGACGGTACCTGACTAAGAAGCCCCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCGCGGCAAGTCTGAAGTGAAAGGCAGGGGCTTAACCCCTGAACTGCTTTGGAAACTGCCATGCTAGAGTGCTGGAGAGGTAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACAGTAACTGACGTTGAGGCTCGAAAGCGTGGGGAGCAAACAGG # 6 TAGGGAATCTTCCACAATGGACGCAAGTCTGATGGAGCAACGCCGCGTGAGTGAAGAAGGGTTTCGGCTCGTAAAGCTCTGTTGGTAGTGAAGAAAGATAGAGGTAGTAACTGGCCTTTATTTGACGGTAATTACCTAGAAAGTCACGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGTGCAGGCGGTTCAATAAGTCTGATGTGAAAGCCTTCGGCTCAACCGGAGAATTGCATCAGAAACTGTTGAACTTGAGTGCAGAAGAGGAGAGTGGAACTCCATGTGTAGCGGTGGAATGCGTAGATATATGGAAGAACACCAGTGGCGAAGGCGGCTCTCTGGTCTGCAACTGACGCTGAGGCTCGAAAGCATGGGTAGCGAACAGG ## copied from above x = list( "Control" = taxa.sig.diffabun.Inc.cont, "Low" = taxa.sig.diffabun.Inc.low, "High" = taxa.sig.diffabun.Inc.high) overlap.inc <- calculate.overlap(x) ## taxa that are only increasing in High: overlap.inc$a7 # [1] "OTU_26" "OTU_29" "OTU_31" "OTU_35" "OTU_37" "OTU_38" "OTU_60" "OTU_79" "OTU_82" # [10] "OTU_91" "OTU_105" "OTU_109" "OTU_115" "OTU_139" "OTU_173" "OTU_196" "OTU_556" "OTU_1737" # [19] "OTU_2750" "OTU_3408" "OTU_3456" "OTU_3669" "OTU_3876" "OTU_5375" "OTU_5520" "OTU_5765" "OTU_5790" # [28] "OTU_5915" "OTU_6509" "OTU_7377" "OTU_9265" sel <- which( !(x[[3]] %in% x[[1]]) & !(x[[3]] %in% x[[2]])) x[[3]][sel] # [1] "OTU_26" "OTU_29" "OTU_31" "OTU_35" "OTU_37" "OTU_38" "OTU_60" "OTU_79" "OTU_82" # [10] "OTU_91" "OTU_105" "OTU_109" "OTU_115" "OTU_139" "OTU_173" "OTU_196" "OTU_556" "OTU_1737" # [19] "OTU_2750" "OTU_3408" "OTU_3456" "OTU_3669" "OTU_3876" "OTU_5375" "OTU_5520" "OTU_5765" "OTU_5790" # [28] "OTU_5915" "OTU_6509" "OTU_7377" "OTU_9265" taxa.sig.ONLY.High <- overlap.inc$a7 ## taxa that are only increasing in Low: sel <- which( !(x[[2]] %in% x[[1]]) & !(x[[2]] %in% x[[3]])) x[[2]][sel] # "OTU_240" "OTU_332" "OTU_5095" "OTU_8615" "OTU_9639" overlap.inc$a3 # "OTU_240" "OTU_332" "OTU_5095" "OTU_8615" "OTU_9639" taxa.sig.ONLY.Low <- overlap.inc$a3 ## taxa that are only increasing in Control: sel <- which( !(x[[1]] %in% x[[2]]) & !(x[[1]] %in% x[[3]])) x[[1]][sel] # [1] "OTU_19" "OTU_57" "OTU_67" "OTU_130" "OTU_133" "OTU_143" "OTU_169" "OTU_201" "OTU_202" # [10] "OTU_247" "OTU_254" "OTU_258" "OTU_402" "OTU_419" "OTU_531" "OTU_2384" "OTU_2934" "OTU_3671" # [19] "OTU_3795" "OTU_4034" "OTU_4077" "OTU_4702" "OTU_5971" "OTU_6689" "OTU_7119" "OTU_8231" overlap.inc$a1 # [1] "OTU_19" "OTU_57" "OTU_67" "OTU_130" "OTU_133" "OTU_143" "OTU_169" "OTU_201" "OTU_202" # [10] "OTU_247" "OTU_254" "OTU_258" "OTU_402" "OTU_419" "OTU_531" "OTU_2384" "OTU_2934" "OTU_3671" # [19] "OTU_3795" "OTU_4034" "OTU_4077" "OTU_4702" "OTU_5971" "OTU_6689" "OTU_7119" "OTU_8231" taxa.sig.ONLY.Control <- overlap.inc$a1 ## load a BLAST database (replace db with the location + name of the BLAST DB) # downloaded db from: ftp://ftp.ncbi.nlm.nih.gov/blast/db/16SMicrobial.tar.gz (Dated 20/01/2019) untar("C:/Workspace/DATA/NCBI_BLAST_16S/16SMicrobial.tar.gz", exdir="C:/Workspace/DATA/NCBI_BLAST_16S/16SMicrobialDB") # blastn.exe is here: "C:/Program Files/NCBI/blast-2.8.1+/bin" Sys.getenv("PATH") # includes: C:\\Program Files\\NCBI\\blast-2.8.1+\\bin # [1] "C:\\R\\R-3.5.1\\bin\\x64;C:\\Program Files (x86)\\Common Files\\Oracle\\Java\\javapath;C:\\Program Files\\ImageMagick-7.0.6-Q16;C:\\ProgramData\\Oracle\\Java\\javapath;C:\\WINDOWS\\system32;C:\\WINDOWS;C:\\WINDOWS\\System32\\Wbem;C:\\WINDOWS\\System32\\WindowsPowerShell\\v1.0\\;C:\\Program Files\\PuTTY\\;C:\\ArcGIS\\EsriProductionMapping\\Desktop10.3\\Bin;C:\\ArcGIS\\Bin;C:\\Program Files (x86)\\Sennheiser\\SoftphoneSDK\\;C:\\Program Files\\NCBI\\blast-2.8.1+\\bin;C:\\Users\\a1026871\\AppData\\Local\\Microsoft\\WindowsApps;C:\\Users\\a1026871\\AppData\\Local\\Pandoc\\;C:\\Program Files\\snap\\bin;C:\\Python27;C:\\Sen2Cor-02.05.05-win64" Sys.which("blastn") # blastn # "C:\\Program Files\\NCBI\\BLAST-~1.1_\\bin\\blastn.exe" system(command = "blastdbcmd -db C:/Workspace/DATA/NCBI_BLAST_16S/16SMicrobialDB/16SMicrobial -info") # Database: 16S Microbial Sequences # 20,792 sequences; 30,234,980 total bases # Date: Jan 20, 2019 12:00 AM Longest sequence: 3,600 bases # BLASTDB Version: 4 # Volumes: # C:\Workspace\DATA\NCBI_BLAST_16S\16SMicrobialDB\16SMicrobial ## set up text_query.txt and adapt this! # system(command = "blastn -query text_query.txt -db C:/Workspace/DATA/NCBI_BLAST_16S/16SMicrobialDB/16SMicrobial -out output.txt") ## assign closest NCBI match to significantly increasing taxa # store temporary blast results here blast_dir <- "C:/Workspace/PROJ/PAPER-MICRO-MICE/modelling/blast_outputs_temp" ### HIGH # clear previous results #unlink(file.path(blast_dir) , recursive=TRUE) # store results in dataframe. Some fields as placeholders to be filled later #df_blast <- data.frame(otu=taxa.sig.ONLY.High, score_bits=NA, expect=NA, pident=NA, closest_ncbi_match=NA ) df_blast <- data.frame(otu=taxa.sig.ONLY.High, AccessionNo=NA, closest_ncbi_match=NA, score_bits=NA, expect=NA, pident=NA, Phylum=NA, Description=NA, Ref=NA ) for (i in 1:length(df_blast$otu)) { # length(taxa.sig.ONLY.High) = 31 #i<-1 sel <- which(seqs_df$subject == df_blast$otu[i]) rep_seq <- seqs_df$`OTU-seq`[sel] fileConn <- file("query_seq.txt") writeLines(text = rep_seq, fileConn) close(fileConn) # blast output formats here: https://www.ncbi.nlm.nih.gov/books/NBK279684/ this_command <- paste0('blastn -db C:/Workspace/DATA/NCBI_BLAST_16S/16SMicrobialDB/16SMicrobial -query query_seq.txt -max_target_seqs 1 -out ',blast_dir,'/output_',i,'.txt') system(command = this_command) if (file.exists(paste0(blast_dir,'/output_',i,'.txt'))) { read_out <- read_file(file = paste0(blast_dir,'/output_',i,'.txt')) # Score idx.before <- str_locate(string = read_out, pattern = "Score = ") idx.after <- str_locate(string = read_out, pattern = " bits ") df_blast$score_bits[i] <- as.numeric( substring(text = read_out, first = as.numeric(idx.before[1,"end"])+1, last = as.numeric(idx.after[1,"start"])-1) ) # Expect - this is unreliable to extract by code # idx.before <- str_locate(string = read_out, pattern = ", Expect = ") # df_blast$expect[i] <- as.numeric( substring(text = read_out, first = as.numeric(idx.before[1,"end"])+1, last = as.numeric(idx.before[1,"end"])+3) ) # Pident (percent identity) ... this fails if 100%, only captures 00 = 0, so check 0's manually idx.after <- str_locate(string = read_out, pattern = ", Gaps = ") df_blast$pident[i] <- as.numeric( substring(text = read_out, first = as.numeric(idx.after[1,"start"])-4, last = as.numeric(idx.after[1,"start"])-3) ) # Putative species / closest NCBI match? idx.before <- str_locate(string = read_out, pattern = "\r\n\r\n\r\n>N") df_blast$closest_ncbi_match[i] <- substring(text = read_out, first = as.numeric(idx.before[1,"end"]), last = as.numeric(idx.before[1,"end"])+60) } else { df_blast$score_bits[i] <- NA #df_blast$expect[i] <- NA df_blast$pident[i] <- NA df_blast$closest_ncbi_match[i] <- NA } print(paste0("Calculated OTU: ", df_blast$otu[i]," no ",i," of ",length(df_blast$otu))) } df_blast.High <- df_blast df_blast.High[ ,c("otu", "closest_ncbi_match", "score_bits", "expect", "pident")] # otu closest_ncbi_match score_bits expect pident # 1 OTU_26 NR_144616.1 Muribaculum intestinale strain YL27 16S ribosomal 466 NA 87 # 2 OTU_29 NR_144616.1 Muribaculum intestinale strain YL27 16S ribosomal 562 NA 91 # 3 OTU_31 NR_134772.1 Murimonas intestini strain SRB-530-5-H 16S riboso 647 NA 96 # 4 OTU_35 NR_041364.1 Prevotella stercorea strain CB35 16S ribosomal RN 584 NA 92 # 5 OTU_37 NR_156080.1 Kineothrix alysoides strain KNHs209 16S ribosomal 521 NA 97 # # 6 OTU_38 NR_117905.1 Acetatifactor muris strain CT-m2 16S ribosomal RN 630 NA 95 # 7 OTU_60 NR_156080.1 Kineothrix alysoides strain KNHs209 16S ribosomal 658 NA 96 # # 8 OTU_79 NR_113117.1 Prevotella oralis strain JCM 12251 16S ribosomal 545 NA 90 # 9 OTU_82 NR_116938.1 Olsenella profusa strain DSM 13989 16S ribosomal 675 NA 96 # 10 OTU_91 NR_113271.1 Alistipes indistinctus strain JCM 16068 16S ribos 662 NA 95 # 11 OTU_105 NR_144616.1 Muribaculum intestinale strain YL27 16S ribosomal 529 NA 89 # 12 OTU_109 NR_026493.1 [Clostridium] indolis strain 7 16S ribosomal RNA 558 NA 91 # 13 OTU_115 NR_144748.1 Phocea massiliensis strain Marseille-P2769 16S ri 573 NA 92 # 14 OTU_139 NR_027558.1 Anaerotruncus colihominis strain WAL 14565 16S ri 651 NA 96 # 15 OTU_173 NR_118554.1 Intestinimonas butyriciproducens strain SRB-521-5 592 NA 93 # 16 OTU_196 NR_115340.1 Saccharofermentans acetigenes strain P6 16S ribos 414 NA 85 # 17 OTU_556 NR_118156.1 Oscillibacter ruminantium strain GH1 16S ribosoma 612 NA 94 # 18 OTU_1737 NR_117905.1 Acetatifactor muris strain CT-m2 16S ribosomal RN 614 NA 94 # 19 OTU_2750 NR_144616.1 Muribaculum intestinale strain YL27 16S ribosomal 556 NA 90 # 20 OTU_3408 NR_115711.1 Clostridium cavendishii strain BL-28 16S ribosoma 411 NA 85 # 21 OTU_3456 NR_144616.1 Muribaculum intestinale strain YL27 16S ribosomal 540 NA 90 # 22 OTU_3669 NR_144731.1 Eisenbergiella massiliensis strain AT11 16S ribos 586 NA 92 # 23 OTU_3876 NR_144616.1 Muribaculum intestinale strain YL27 16S ribosomal 562 NA 91 # 24 OTU_5375 NR_036800.1 [Ruminococcus] gnavus strain ATCC 29149 16S ribos 564 NA 92 # 25 OTU_5520 NR_156081.1 Kineothrix alysoides strain KNHs209 16S ribosomal 658 NA 96 # # 26 OTU_5765 NR_104899.1 Bacteroides xylanolyticus strain X5-1 16S ribosom 569 NA 92 # 27 OTU_5790 NR_156081.1 Kineothrix alysoides strain KNHs209 16S ribosomal 483 NA 95 # # 28 OTU_5915 NR_144616.1 Muribaculum intestinale strain YL27 16S ribosomal 584 NA 92 # 29 OTU_6509 NR_144616.1 Muribaculum intestinale strain YL27 16S ribosomal 505 NA 88 # 30 OTU_7377 NR_144616.1 Muribaculum intestinale strain YL27 16S ribosomal 505 NA 88 # 31 OTU_9265 NR_144616.1 Muribaculum intestinale strain YL27 16S ribosomal 488 NA 88 write.csv(df_blast.High, file = "blast-results-High-vFINAL1.csv", row.names = FALSE) ### LOW # clear previous results - OR move to storage folder #unlink(file.path(blast_dir) , recursive=TRUE) # store results in dataframe. Some fields as placeholders to be filled later df_blast <- data.frame(otu=taxa.sig.ONLY.Low, AccessionNo=NA, closest_ncbi_match=NA, score_bits=NA, expect=NA, pident=NA, Phylum=NA, Description=NA, Ref=NA ) for (i in 1:length(df_blast$otu)) { # length(taxa.sig.ONLY.Low) = 5 #i<-1 sel <- which(seqs_df$subject == df_blast$otu[i]) rep_seq <- seqs_df$`OTU-seq`[sel] fileConn <- file("query_seq.txt") writeLines(text = rep_seq, fileConn) close(fileConn) this_command <- paste0('blastn -db C:/Workspace/DATA/NCBI_BLAST_16S/16SMicrobialDB/16SMicrobial -query query_seq.txt -max_target_seqs 1 -out ',blast_dir,'/output_',i,'.txt') system(command = this_command) if (file.exists(paste0(blast_dir,'/output_',i,'.txt'))) { read_out <- read_file(file = paste0(blast_dir,'/output_',i,'.txt')) # Score idx.before <- str_locate(string = read_out, pattern = "Score = ") idx.after <- str_locate(string = read_out, pattern = " bits ") df_blast$score_bits[i] <- as.numeric( substring(text = read_out, first = as.numeric(idx.before[1,"end"])+1, last = as.numeric(idx.after[1,"start"])-1) ) # Expect - this is unreliable to extract by code # idx.before <- str_locate(string = read_out, pattern = ", Expect = ") # df_blast$expect[i] <- as.numeric( substring(text = read_out, first = as.numeric(idx.before[1,"end"])+1, last = as.numeric(idx.before[1,"end"])+3) ) # Pident (percent identity) ... this fails if 100%, only captures 00 = 0, so check 0's manually idx.after <- str_locate(string = read_out, pattern = ", Gaps = ") df_blast$pident[i] <- as.numeric( substring(text = read_out, first = as.numeric(idx.after[1,"start"])-4, last = as.numeric(idx.after[1,"start"])-3) ) # Putative species / closest NCBI match? idx.before <- str_locate(string = read_out, pattern = "\r\n\r\n\r\n>N") df_blast$closest_ncbi_match[i] <- substring(text = read_out, first = as.numeric(idx.before[1,"end"]), last = as.numeric(idx.before[1,"end"])+60) } else { df_blast$score_bits[i] <- NA #df_blast$expect[i] <- NA df_blast$pident[i] <- NA df_blast$closest_ncbi_match[i] <- NA } print(paste0("Calculated OTU: ", df_blast$otu[i]," no ",i," of ",length(df_blast$otu))) } df_blast.Low <- df_blast df_blast.Low[ ,c("otu", "closest_ncbi_match", "score_bits", "expect", "pident")] # otu closest_ncbi_match score_bits expect pident # 1 OTU_240 NR_144608.1 Cuneatibacter caecimuris strain BARN-424-CC-10 16 654 NA 96 # 2 OTU_332 NR_144611.1 Flintibacter butyricus strain BLS21 16S ribosomal 573 NA 92 # 3 OTU_5095 NR_147370.1 Pseudoflavonifractor phocaeensis strain Marseille 680 NA 97 # 4 OTU_8615 NR_144750.1 Prevotellamassilia timonensis strain Marseille-P2 457 NA 86 # 5 OTU_9639 NR_156081.1 Kineothrix alysoides strain KNHs209 16S ribosomal 603 NA 94 write.csv(df_blast.Low, file = "blast-results-Low-vFINAL1.csv", row.names = FALSE) ### CONTROL # clear previous results - OR move to storage folder #unlink(file.path(blast_dir) , recursive=TRUE) # store results in dataframe. Some fields as placeholders to be filled later df_blast <- data.frame(otu=taxa.sig.ONLY.Control, AccessionNo=NA, closest_ncbi_match=NA, score_bits=NA, expect=NA, pident=NA, Phylum=NA, Description=NA, Ref=NA ) for (i in 1:length(df_blast$otu)) { # length(taxa.sig.ONLY.Control) = 26 #i<-1 sel <- which(seqs_df$subject == df_blast$otu[i]) rep_seq <- seqs_df$`OTU-seq`[sel] fileConn <- file("query_seq.txt") writeLines(text = rep_seq, fileConn) close(fileConn) # blast output formats here: https://www.ncbi.nlm.nih.gov/books/NBK279684/ this_command <- paste0('blastn -db C:/Workspace/DATA/NCBI_BLAST_16S/16SMicrobialDB/16SMicrobial -query query_seq.txt -max_target_seqs 1 -out ',blast_dir,'/output_',i,'.txt') system(command = this_command) if (file.exists(paste0(blast_dir,'/output_',i,'.txt'))) { read_out <- read_file(file = paste0(blast_dir,'/output_',i,'.txt')) # Score idx.before <- str_locate(string = read_out, pattern = "Score = ") idx.after <- str_locate(string = read_out, pattern = " bits ") df_blast$score_bits[i] <- as.numeric( substring(text = read_out, first = as.numeric(idx.before[1,"end"])+1, last = as.numeric(idx.after[1,"start"])-1) ) # Expect - this is unreliable to extract by code # idx.before <- str_locate(string = read_out, pattern = ", Expect = ") # df_blast$expect[i] <- as.numeric( substring(text = read_out, first = as.numeric(idx.before[1,"end"])+1, last = as.numeric(idx.before[1,"end"])+3) ) # Pident (percent identity) ... this fails if 100%, only captures 00 = 0, so check 0's manually idx.after <- str_locate(string = read_out, pattern = ", Gaps = ") df_blast$pident[i] <- as.numeric( substring(text = read_out, first = as.numeric(idx.after[1,"start"])-4, last = as.numeric(idx.after[1,"start"])-3) ) # Putative species / closest NCBI match? idx.before <- str_locate(string = read_out, pattern = "\r\n\r\n\r\n>N") df_blast$closest_ncbi_match[i] <- substring(text = read_out, first = as.numeric(idx.before[1,"end"]), last = as.numeric(idx.before[1,"end"])+60) } else { df_blast$score_bits[i] <- NA #df_blast$expect[i] <- NA df_blast$pident[i] <- NA df_blast$closest_ncbi_match[i] <- NA } print(paste0("Calculated OTU: ", df_blast$otu[i]," no ",i," of ",length(df_blast$otu))) } df_blast.Cont <- df_blast df_blast.Cont[ ,c("otu", "closest_ncbi_match", "score_bits", "expect", "pident")] # otu closest_ncbi_match score_bits expect pident # 1 OTU_19 NR_074515.1 Bacteroides vulgatus strain ATCC 8482 16S ribosom 784 NA 0 # 2 OTU_57 NR_044648.2 [Eubacterium] tortuosum strain ATCC 25548 16S rib 586 NA 91 # 3 OTU_67 NR_074793.2 Oscillibacter valericigenes strain Sjm18-20 16S r 645 NA 95 # 4 OTU_130 NR_144749.1 Ihubacter massiliensis strain Marseille-P2843 16S 693 NA 98 # 5 OTU_133 NR_119085.1 [Clostridium] polysaccharolyticum strain DSM 1801 634 NA 95 # 6 OTU_143 NR_156080.1 Kineothrix alysoides strain KNHs209 16S ribosomal 630 NA 95 # 7 OTU_169 NR_025162.1 Tindallia californiensis strain APO 16S ribosomal 259 NA 79 # 8 OTU_201 NR_028911.1 Anaerovorax odorimutans strain NorPut 16S ribosom 568 NA 92 # 9 OTU_202 NR_042825.1 Paraeggerthella hongkongensis strain HKU10 16S ri 580 NA 93 # 10 OTU_247 NR_042007.1 Roseburia inulinivorans strain A2-194 16S ribosom 593 NA 93 # 11 OTU_254 NR_114789.1 [Clostridium] leptum strain DSM 753 16S ribosomal 619 NA 94 # 12 OTU_258 NR_074629.1 Ruminiclostridium thermocellum strain ATCC 27405 520 NA 90 # 13 OTU_402 NR_148620.1 Lachnotalea glycerini strain : DLD10 16S ribosoma 658 NA 96 # 14 OTU_419 NR_117905.1 Acetatifactor muris strain CT-m2 16S ribosomal RN 597 NA 93 # 15 OTU_531 NR_156081.1 Kineothrix alysoides strain KNHs209 16S ribosomal 686 NA 97 # 16 OTU_2384 NR_026493.1 [Clostridium] indolis strain 7 16S ribosomal RNA 614 NA 94 # 17 OTU_2934 NR_036800.1 [Ruminococcus] gnavus strain ATCC 29149 16S ribos 608 NA 94 # 18 OTU_3671 NR_116747.1 Ruminococcus faecis strain Eg2 16S ribosomal RNA 542 NA 91 # 19 OTU_3795 NR_074793.2 Oscillibacter valericigenes strain Sjm18-20 16S r 590 NA 93 # 20 OTU_4034 NR_156907.1 Faecalimonas umbilicata strain EGH7 16S ribosomal 603 NA 94 # 21 OTU_4077 NR_026493.1 [Clostridium] indolis strain 7 16S ribosomal RNA 592 NA 93 # 22 OTU_4702 NR_152688.1 Falcatimonas natans strain WN011 16S ribosomal RN 597 NA 93 # 23 OTU_5971 NR_156080.1 Kineothrix alysoides strain KNHs209 16S ribosomal 569 NA 92 # 24 OTU_6689 NR_036800.1 [Ruminococcus] gnavus strain ATCC 29149 16S ribos 619 NA 94 # 25 OTU_7119 NR_042152.1 Marvinbryantia formatexigens strain I-52 16S ribo 401 NA 85 # 26 OTU_8231 NR_028785.1 [Clostridium] scindens strain ATCC 35704 16S ribo 390 NA 91 write.csv(df_blast.Cont, file = "blast-results-Cont-vFINAL1.csv", row.names = FALSE) #------------------------ #### Add stats on % OTU Rel Abun Air, Soil, Fecal, Cecal #------------------------ ## add values for max % OTU Rel Abun in soil, Week 7 fecal, cecal samples ## add to temp.df_blast <- list() temp.df_blast[["Cont"]] <- df_blast.Cont; temp.df_blast[["Low"]] <- df_blast.Low; temp.df_blast[["High"]] <- df_blast.High # # # # # # # # # # # # # # # Control # # # # # # # # # # # # # # ## Air - Control air.w1w6.Cont.dc.16s <- prune_samples( air.dc.16s@sam_data$Treatment == "Control", air.dc.16s ) min(sample_sums(air.w1w6.Cont.dc.16s)) # 1681 min(taxa_sums(air.w1w6.Cont.dc.16s)) # 0 # prune taxa that have zero sequence reads air.w1w6.Cont.dc.16s <- prune_taxa(taxa = taxa_sums(air.w1w6.Cont.dc.16s) > 0, x = air.w1w6.Cont.dc.16s) air.w1w6.Cont.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 504 taxa and 24 samples ] # sample_data() Sample Data: [ 24 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 504 taxa by 7 taxonomic ranks ] # % Relative abundance relabun.air.w1w6.Cont.dc.16s <- transform_sample_counts(air.w1w6.Cont.dc.16s, function(x) 100*x / sum(x) ) relabun.air.w1w6.Cont.dc.16s ## No Control soils ## Fecal Week 0 - Control fecalw0.Cont.dc.16s <- prune_samples( fecalw0.dc.16s@sam_data$Treatment == "Control", fecalw0.dc.16s ) min(sample_sums(fecalw0.Cont.dc.16s)) # 20718 min(taxa_sums(fecalw0.Cont.dc.16s)) # 0 # prune taxa that have zero sequence reads fecalw0.Cont.dc.16s <- prune_taxa( taxa = taxa_sums(fecalw0.Cont.dc.16s) > 0, x = fecalw0.Cont.dc.16s ) fecalw0.Cont.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 665 taxa and 18 samples ] # sample_data() Sample Data: [ 18 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 665 taxa by 7 taxonomic ranks ] # % Relative abundance relabun.fecalw0.Cont.dc.16s <- transform_sample_counts(fecalw0.Cont.dc.16s, function(x) 100*x / sum(x) ) relabun.fecalw0.Cont.dc.16s ## Fecal Week 7 - Control fecalw7.Cont.dc.16s <- prune_samples( fecalw7.dc.16s@sam_data$Treatment == "Control", fecalw7.dc.16s ) min(sample_sums(fecalw7.Cont.dc.16s)) # 22492 min(taxa_sums(fecalw7.Cont.dc.16s)) # 0 # prune taxa that have zero sequence reads fecalw7.Cont.dc.16s <- prune_taxa( taxa = taxa_sums(fecalw7.Cont.dc.16s) > 0, x = fecalw7.Cont.dc.16s ) fecalw7.Cont.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 566 taxa and 18 samples ] # sample_data() Sample Data: [ 18 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 566 taxa by 7 taxonomic ranks ] # % Relative abundance relabun.fecalw7.Cont.dc.16s <- transform_sample_counts(fecalw7.Cont.dc.16s, function(x) 100*x / sum(x) ) relabun.fecalw7.Cont.dc.16s ## Cecal - Control cecal.Cont.dc.16s <- prune_samples( cecal.dc.16s@sam_data$Treatment == "Control", cecal.dc.16s ) min(sample_sums(cecal.Cont.dc.16s)) # 36868 min(taxa_sums(cecal.Cont.dc.16s)) # 0 # prune taxa that have zero sequence reads cecal.Cont.dc.16s <- prune_taxa( taxa = taxa_sums(cecal.Cont.dc.16s) > 0, x = cecal.Cont.dc.16s ) cecal.Cont.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 512 taxa and 18 samples ] # sample_data() Sample Data: [ 18 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 512 taxa by 7 taxonomic ranks ] # % Relative abundance relabun.cecal.Cont.dc.16s <- transform_sample_counts(cecal.Cont.dc.16s, function(x) 100*x / sum(x) ) relabun.cecal.Cont.dc.16s # # # # # # # # # # # # # # # Low # # # # # # # # # # # # # # ## Air - Low air.w1w6.Low.dc.16s <- prune_samples( air.dc.16s@sam_data$Treatment == "Low", air.dc.16s ) min(sample_sums(air.w1w6.Low.dc.16s)) # 1307 min(taxa_sums(air.w1w6.Low.dc.16s)) # 0 # prune taxa that have zero sequence reads air.w1w6.Low.dc.16s <- prune_taxa(taxa = taxa_sums(air.w1w6.Low.dc.16s) > 0, x = air.w1w6.Low.dc.16s) air.w1w6.Low.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 1675 taxa and 24 samples ] # sample_data() Sample Data: [ 24 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 1675 taxa by 7 taxonomic ranks ] # % Relative abundance relabun.air.w1w6.Low.dc.16s <- transform_sample_counts(air.w1w6.Low.dc.16s, function(x) 100*x / sum(x) ) relabun.air.w1w6.Low.dc.16s ## Soils - Low soil.Low.dc.16s <- prune_samples( soil.dc.16s@sam_data$Treatment == "Low", soil.dc.16s ) min(sample_sums(soil.Low.dc.16s)) # 15550 min(taxa_sums(soil.Low.dc.16s)) # 0 # prune taxa that have zero sequence reads soil.Low.dc.16s <- prune_taxa(taxa = taxa_sums(soil.Low.dc.16s) > 0, x = soil.Low.dc.16s) soil.Low.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 2145 taxa and 33 samples ] # sample_data() Sample Data: [ 33 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 2145 taxa by 7 taxonomic ranks ] # % Relative abundance relabun.soil.Low.dc.16s <- transform_sample_counts(soil.Low.dc.16s, function(x) 100*x / sum(x) ) relabun.soil.Low.dc.16s taxa_in_soil.Low <- taxa_names(soil.Low.dc.16s) ## Fecal Week 0 - Low fecalw0.Low.dc.16s <- prune_samples( fecalw0.dc.16s@sam_data$Treatment == "Low", fecalw0.dc.16s ) min(sample_sums(fecalw0.Low.dc.16s)) # 23952 min(taxa_sums(fecalw0.Low.dc.16s)) # 0 # prune taxa that have zero sequence reads fecalw0.Low.dc.16s <- prune_taxa( taxa = taxa_sums(fecalw0.Low.dc.16s) > 0, x = fecalw0.Low.dc.16s ) fecalw0.Low.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 571 taxa and 18 samples ] # sample_data() Sample Data: [ 18 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 571 taxa by 7 taxonomic ranks ] # % Relative abundance relabun.fecalw0.Low.dc.16s <- transform_sample_counts(fecalw0.Low.dc.16s, function(x) 100*x / sum(x) ) relabun.fecalw0.Low.dc.16s ## Fecal Week 7 - Low fecalw7.Low.dc.16s <- prune_samples( fecalw7.dc.16s@sam_data$Treatment == "Low", fecalw7.dc.16s ) min(sample_sums(fecalw7.Low.dc.16s)) # 19385 min(taxa_sums(fecalw7.Low.dc.16s)) # 0 # prune taxa that have zero sequence reads fecalw7.Low.dc.16s <- prune_taxa( taxa = taxa_sums(fecalw7.Low.dc.16s) > 0, x = fecalw7.Low.dc.16s ) fecalw7.Low.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 560 taxa and 17 samples ] # sample_data() Sample Data: [ 17 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 560 taxa by 7 taxonomic ranks ] # % Relative abundance relabun.fecalw7.Low.dc.16s <- transform_sample_counts(fecalw7.Low.dc.16s, function(x) 100*x / sum(x) ) relabun.fecalw7.Low.dc.16s ## Cecal - Low cecal.Low.dc.16s <- prune_samples( cecal.dc.16s@sam_data$Treatment == "Low", cecal.dc.16s ) min(sample_sums(cecal.Low.dc.16s)) # 14632 min(taxa_sums(cecal.Low.dc.16s)) # 0 # prune taxa that have zero sequence reads cecal.Low.dc.16s <- prune_taxa( taxa = taxa_sums(cecal.Low.dc.16s) > 0, x = cecal.Low.dc.16s ) cecal.Low.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 489 taxa and 17 samples ] # sample_data() Sample Data: [ 17 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 489 taxa by 7 taxonomic ranks ] # % Relative abundance relabun.cecal.Low.dc.16s <- transform_sample_counts(cecal.Low.dc.16s, function(x) 100*x / sum(x) ) relabun.cecal.Low.dc.16s # # # # # # # # # # # # # # # High # # # # # # # # # # # # # # ## Air - High air.w1w6.High.dc.16s <- prune_samples( air.dc.16s@sam_data$Treatment == "High", air.dc.16s ) min(sample_sums(air.w1w6.High.dc.16s)) # 1400 min(taxa_sums(air.w1w6.High.dc.16s)) # 0 # prune taxa that have zero sequence reads air.w1w6.High.dc.16s <- prune_taxa(taxa = taxa_sums(air.w1w6.High.dc.16s) > 0, x = air.w1w6.High.dc.16s) air.w1w6.High.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 1562 taxa and 24 samples ] # sample_data() Sample Data: [ 24 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 1562 taxa by 7 taxonomic ranks ] # % Relative abundance relabun.air.w1w6.High.dc.16s <- transform_sample_counts(air.w1w6.High.dc.16s, function(x) 100*x / sum(x) ) relabun.air.w1w6.High.dc.16s ## Soils - High soil.High.dc.16s <- prune_samples( soil.dc.16s@sam_data$Treatment == "High", soil.dc.16s ) min(sample_sums(soil.High.dc.16s)) # 12524 min(taxa_sums(soil.High.dc.16s)) # 0 # prune taxa that have zero sequence reads soil.High.dc.16s <- prune_taxa(taxa = taxa_sums(soil.High.dc.16s) > 0, x = soil.High.dc.16s) soil.High.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 2125 taxa and 32 samples ] # sample_data() Sample Data: [ 32 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 2125 taxa by 7 taxonomic ranks ] # % Relative abundance relabun.soil.High.dc.16s <- transform_sample_counts(soil.High.dc.16s, function(x) 100*x / sum(x) ) relabun.soil.High.dc.16s taxa_in_soil.High <- taxa_names(soil.High.dc.16s) ## Fecal Week 0 - High fecalw0.High.dc.16s <- prune_samples( fecalw0.dc.16s@sam_data$Treatment == "High", fecalw0.dc.16s ) min(sample_sums(fecalw0.High.dc.16s)) # 11450 min(taxa_sums(fecalw0.High.dc.16s)) # 0 # prune taxa that have zero sequence reads fecalw0.High.dc.16s <- prune_taxa( taxa = taxa_sums(fecalw0.High.dc.16s) > 0, x = fecalw0.High.dc.16s ) fecalw0.High.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 562 taxa and 18 samples ] # sample_data() Sample Data: [ 18 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 562 taxa by 7 taxonomic ranks ] # % Relative abundance relabun.fecalw0.High.dc.16s <- transform_sample_counts(fecalw0.High.dc.16s, function(x) 100*x / sum(x) ) relabun.fecalw0.High.dc.16s ## Fecal Week 7 - High fecalw7.High.dc.16s <- prune_samples( fecalw7.dc.16s@sam_data$Treatment == "High", fecalw7.dc.16s ) min(sample_sums(fecalw7.High.dc.16s)) # 14195 min(taxa_sums(fecalw7.High.dc.16s)) # 0 # prune taxa that have zero sequence reads fecalw7.High.dc.16s <- prune_taxa( taxa = taxa_sums(fecalw7.High.dc.16s) > 0, x = fecalw7.High.dc.16s ) fecalw7.High.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 573 taxa and 18 samples ] # sample_data() Sample Data: [ 18 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 573 taxa by 7 taxonomic ranks ] # % Relative abundance relabun.fecalw7.High.dc.16s <- transform_sample_counts(fecalw7.High.dc.16s, function(x) 100*x / sum(x) ) relabun.fecalw7.High.dc.16s ## Cecal - High cecal.High.dc.16s <- prune_samples( cecal.dc.16s@sam_data$Treatment == "High", cecal.dc.16s ) min(sample_sums(cecal.High.dc.16s)) # 40098 min(taxa_sums(cecal.High.dc.16s)) # 0 # prune taxa that have zero sequence reads cecal.High.dc.16s <- prune_taxa( taxa = taxa_sums(cecal.High.dc.16s) > 0, x = cecal.High.dc.16s ) cecal.High.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 501 taxa and 18 samples ] # sample_data() Sample Data: [ 18 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 501 taxa by 7 taxonomic ranks ] # % Relative abundance relabun.cecal.High.dc.16s <- transform_sample_counts(cecal.High.dc.16s, function(x) 100*x / sum(x) ) relabun.cecal.High.dc.16s ## loop through OTUs in each dataframe to add % OTU Rel Abun for: soils, air, fecal Week 7, cecal ## capture % OTU Rel Abun as: median (range) ## Control #df_blast.Cont <- temp.df_blast[["Cont"]] df_blast.Cont$rel_abun_airw1w6_mean <-NA df_blast.Cont$rel_abun_airw1w6_min <-NA df_blast.Cont$rel_abun_airw1w6_max <-NA df_blast.Cont$rel_abun_airw1w6_n_nonzero <-NA df_blast.Cont$rel_abun_airw1w6_n <-NA df_blast.Cont$rel_abun_fecalw0_mean <-NA df_blast.Cont$rel_abun_fecalw0_min <-NA df_blast.Cont$rel_abun_fecalw0_max <-NA df_blast.Cont$rel_abun_fecalw0_n_nonzero <-NA df_blast.Cont$rel_abun_fecalw0_n <-NA df_blast.Cont$rel_abun_fecalw7_mean <-NA df_blast.Cont$rel_abun_fecalw7_min <-NA df_blast.Cont$rel_abun_fecalw7_max <-NA df_blast.Cont$rel_abun_fecalw7_n_nonzero <-NA df_blast.Cont$rel_abun_fecalw7_n <-NA df_blast.Cont$rel_abun_cecal_mean <-NA df_blast.Cont$rel_abun_cecal_min <-NA df_blast.Cont$rel_abun_cecal_max <-NA df_blast.Cont$rel_abun_cecal_n_nonzero <-NA df_blast.Cont$rel_abun_cecal_n <-NA for (i in 1:length(df_blast.Cont$otu)) { #i<-1 this_otu <- as.character( df_blast.Cont$otu[i] ) sel <- which( row.names(clean.16s@tax_table) == this_otu) if (length(sel)==1){ df_blast.Cont$Phylum[i] <- sub(pattern="p__", replacement="", x=as.character(clean.16s@tax_table[sel, "Phylum"]) ) } sel <- which( row.names(relabun.air.w1w6.Cont.dc.16s@otu_table) == this_otu) if (length(sel)==1){ x <- as.numeric(relabun.air.w1w6.Cont.dc.16s@otu_table[sel, ]) subsel <- which(!x==0) df_blast.Cont$rel_abun_airw1w6_mean[i] <- mean( x ) # [subsel] df_blast.Cont$rel_abun_airw1w6_min[i] <- min( x ) df_blast.Cont$rel_abun_airw1w6_max[i] <- max( x ) df_blast.Cont$rel_abun_airw1w6_n_nonzero[i] <- length( x[subsel] ) df_blast.Cont$rel_abun_airw1w6_n[i] <- length( x ) } sel <- which( row.names(relabun.fecalw0.Cont.dc.16s@otu_table) == this_otu) if (length(sel)==1){ x <- as.numeric(relabun.fecalw0.Cont.dc.16s@otu_table[sel, ]) subsel <- which(!x==0) df_blast.Cont$rel_abun_fecalw0_mean[i] <- mean( x ) # [subsel] df_blast.Cont$rel_abun_fecalw0_min[i] <- min( x ) df_blast.Cont$rel_abun_fecalw0_max[i] <- max( x ) df_blast.Cont$rel_abun_fecalw0_n_nonzero[i] <- length( x[subsel] ) df_blast.Cont$rel_abun_fecalw0_n[i] <- length( x ) } sel <- which( row.names(relabun.fecalw7.Cont.dc.16s@otu_table) == this_otu) if (length(sel)==1){ x <- as.numeric(relabun.fecalw7.Cont.dc.16s@otu_table[sel, ]) subsel <- which(!x==0) df_blast.Cont$rel_abun_fecalw7_mean[i] <- mean( x ) # [subsel] df_blast.Cont$rel_abun_fecalw7_min[i] <- min( x ) df_blast.Cont$rel_abun_fecalw7_max[i] <- max( x ) df_blast.Cont$rel_abun_fecalw7_n_nonzero[i] <- length( x[subsel] ) df_blast.Cont$rel_abun_fecalw7_n[i] <- length( x ) } sel <- which( row.names(relabun.cecal.Cont.dc.16s@otu_table) == this_otu) if (length(sel)==1){ x <- as.numeric(relabun.cecal.Cont.dc.16s@otu_table[sel, ]) subsel <- which(!x==0) df_blast.Cont$rel_abun_cecal_mean[i] <- mean( x ) # [subsel] df_blast.Cont$rel_abun_cecal_min[i] <- min( x ) df_blast.Cont$rel_abun_cecal_max[i] <- max( x ) df_blast.Cont$rel_abun_cecal_n_nonzero[i] <- length( x[subsel] ) df_blast.Cont$rel_abun_cecal_n[i] <- length( x ) } print(paste0("completed OTU # ",i)) } ## Low #df_blast.Low <- temp.df_blast[["Low"]] df_blast.Low$rel_abun_soil_mean <-NA df_blast.Low$rel_abun_soil_min <-NA df_blast.Low$rel_abun_soil_max <-NA df_blast.Low$rel_abun_soil_n_nonzero <-NA df_blast.Low$rel_abun_soil_n <-NA df_blast.Low$rel_abun_airw1w6_mean <-NA df_blast.Low$rel_abun_airw1w6_min <-NA df_blast.Low$rel_abun_airw1w6_max <-NA df_blast.Low$rel_abun_airw1w6_n_nonzero <-NA df_blast.Low$rel_abun_airw1w6_n <-NA df_blast.Low$rel_abun_fecalw0_mean <-NA df_blast.Low$rel_abun_fecalw0_min <-NA df_blast.Low$rel_abun_fecalw0_max <-NA df_blast.Low$rel_abun_fecalw0_n_nonzero <-NA df_blast.Low$rel_abun_fecalw0_n <-NA df_blast.Low$rel_abun_fecalw7_mean <-NA df_blast.Low$rel_abun_fecalw7_min <-NA df_blast.Low$rel_abun_fecalw7_max <-NA df_blast.Low$rel_abun_fecalw7_n_nonzero <-NA df_blast.Low$rel_abun_fecalw7_n <-NA df_blast.Low$rel_abun_cecal_mean <-NA df_blast.Low$rel_abun_cecal_min <-NA df_blast.Low$rel_abun_cecal_max <-NA df_blast.Low$rel_abun_cecal_n_nonzero <-NA df_blast.Low$rel_abun_cecal_n <-NA for (i in 1:length(df_blast.Low$otu)) { #i<-1 this_otu <- as.character( df_blast.Low$otu[i] ) sel <- which( row.names(clean.16s@tax_table) == this_otu) if (length(sel)==1){ df_blast.Low$Phylum[i] <- sub(pattern="p__", replacement="", x=as.character(clean.16s@tax_table[sel, "Phylum"]) ) } sel <- which( row.names(relabun.soil.Low.dc.16s@otu_table) == this_otu) if (length(sel)==1){ x <- as.numeric(relabun.soil.Low.dc.16s@otu_table[sel, ]) subsel <- which(!x==0) df_blast.Low$rel_abun_soil_mean[i] <- mean( x ) # [subsel] df_blast.Low$rel_abun_soil_min[i] <- min( x ) df_blast.Low$rel_abun_soil_max[i] <- max( x ) df_blast.Low$rel_abun_soil_n_nonzero[i] <- length( x[subsel] ) df_blast.Low$rel_abun_soil_n[i] <- length( x ) } sel <- which( row.names(relabun.air.w1w6.Low.dc.16s@otu_table) == this_otu) if (length(sel)==1){ x <- as.numeric(relabun.air.w1w6.Low.dc.16s@otu_table[sel, ]) subsel <- which(!x==0) df_blast.Low$rel_abun_airw1w6_mean[i] <- mean( x ) # [subsel] df_blast.Low$rel_abun_airw1w6_min[i] <- min( x ) df_blast.Low$rel_abun_airw1w6_max[i] <- max( x ) df_blast.Low$rel_abun_airw1w6_n_nonzero[i] <- length( x[subsel] ) df_blast.Low$rel_abun_airw1w6_n[i] <- length( x ) } sel <- which( row.names(relabun.fecalw0.Low.dc.16s@otu_table) == this_otu) if (length(sel)==1){ x <- as.numeric(relabun.fecalw0.Low.dc.16s@otu_table[sel, ]) subsel <- which(!x==0) df_blast.Low$rel_abun_fecalw0_mean[i] <- mean( x ) # [subsel] df_blast.Low$rel_abun_fecalw0_min[i] <- min( x ) df_blast.Low$rel_abun_fecalw0_max[i] <- max( x ) df_blast.Low$rel_abun_fecalw0_n_nonzero[i] <- length( x[subsel] ) df_blast.Low$rel_abun_fecalw0_n[i] <- length( x ) } sel <- which( row.names(relabun.fecalw7.Low.dc.16s@otu_table) == this_otu) if (length(sel)==1){ x <- as.numeric(relabun.fecalw7.Low.dc.16s@otu_table[sel, ]) subsel <- which(!x==0) df_blast.Low$rel_abun_fecalw7_mean[i] <- mean( x ) # [subsel] df_blast.Low$rel_abun_fecalw7_min[i] <- min( x ) df_blast.Low$rel_abun_fecalw7_max[i] <- max( x ) df_blast.Low$rel_abun_fecalw7_n_nonzero[i] <- length( x[subsel] ) df_blast.Low$rel_abun_fecalw7_n[i] <- length( x ) } sel <- which( row.names(relabun.cecal.Low.dc.16s@otu_table) == this_otu) if (length(sel)==1){ x <- as.numeric(relabun.cecal.Low.dc.16s@otu_table[sel, ]) subsel <- which(!x==0) df_blast.Low$rel_abun_cecal_mean[i] <- mean( x ) # [subsel] df_blast.Low$rel_abun_cecal_min[i] <- min( x ) df_blast.Low$rel_abun_cecal_max[i] <- max( x ) df_blast.Low$rel_abun_cecal_n_nonzero[i] <- length( x[subsel] ) df_blast.Low$rel_abun_cecal_n[i] <- length( x ) } print(paste0("completed OTU # ",i)) } ## High #df_blast.High <- temp.df_blast[["High"]] df_blast.High$rel_abun_soil_mean <-NA df_blast.High$rel_abun_soil_min <-NA df_blast.High$rel_abun_soil_max <-NA df_blast.High$rel_abun_soil_n_nonzero <-NA df_blast.High$rel_abun_soil_n <-NA df_blast.High$rel_abun_airw1w6_mean <-NA df_blast.High$rel_abun_airw1w6_min <-NA df_blast.High$rel_abun_airw1w6_max <-NA df_blast.High$rel_abun_airw1w6_n_nonzero <-NA df_blast.High$rel_abun_airw1w6_n <-NA df_blast.High$rel_abun_fecalw0_mean <-NA df_blast.High$rel_abun_fecalw0_min <-NA df_blast.High$rel_abun_fecalw0_max <-NA df_blast.High$rel_abun_fecalw0_n_nonzero <-NA df_blast.High$rel_abun_fecalw0_n <-NA df_blast.High$rel_abun_fecalw7_mean <-NA df_blast.High$rel_abun_fecalw7_min <-NA df_blast.High$rel_abun_fecalw7_max <-NA df_blast.High$rel_abun_fecalw7_n_nonzero <-NA df_blast.High$rel_abun_fecalw7_n <-NA df_blast.High$rel_abun_cecal_mean <-NA df_blast.High$rel_abun_cecal_min <-NA df_blast.High$rel_abun_cecal_max <-NA df_blast.High$rel_abun_cecal_n_nonzero <-NA df_blast.High$rel_abun_cecal_n <-NA for (i in 1:length(df_blast.High$otu)) { #i<-1 this_otu <- as.character( df_blast.High$otu[i] ) sel <- which( row.names(clean.16s@tax_table) == this_otu) if (length(sel)==1){ df_blast.High$Phylum[i] <- sub(pattern="p__", replacement="", x=as.character(clean.16s@tax_table[sel, "Phylum"]) ) } sel <- which( row.names(relabun.soil.High.dc.16s@otu_table) == this_otu) if (length(sel)==1){ x <- as.numeric(relabun.soil.High.dc.16s@otu_table[sel, ]) subsel <- which(!x==0) df_blast.High$rel_abun_soil_mean[i] <- mean( x ) # [subsel] df_blast.High$rel_abun_soil_min[i] <- min( x ) df_blast.High$rel_abun_soil_max[i] <- max( x ) df_blast.High$rel_abun_soil_n_nonzero[i] <- length( x[subsel] ) df_blast.High$rel_abun_soil_n[i] <- length( x ) } sel <- which( row.names(relabun.air.w1w6.High.dc.16s@otu_table) == this_otu) if (length(sel)==1){ x <- as.numeric(relabun.air.w1w6.High.dc.16s@otu_table[sel, ]) subsel <- which(!x==0) df_blast.High$rel_abun_airw1w6_mean[i] <- mean( x ) # [subsel] df_blast.High$rel_abun_airw1w6_min[i] <- min( x ) df_blast.High$rel_abun_airw1w6_max[i] <- max( x ) df_blast.High$rel_abun_airw1w6_n_nonzero[i] <- length( x[subsel] ) df_blast.High$rel_abun_airw1w6_n[i] <- length( x ) } sel <- which( row.names(relabun.fecalw0.High.dc.16s@otu_table) == this_otu) if (length(sel)==1){ x <- as.numeric(relabun.fecalw0.High.dc.16s@otu_table[sel, ]) subsel <- which(!x==0) df_blast.High$rel_abun_fecalw0_mean[i] <- mean( x ) # [subsel] df_blast.High$rel_abun_fecalw0_min[i] <- min( x ) df_blast.High$rel_abun_fecalw0_max[i] <- max( x ) df_blast.High$rel_abun_fecalw0_n_nonzero[i] <- length( x[subsel] ) df_blast.High$rel_abun_fecalw0_n[i] <- length( x ) } sel <- which( row.names(relabun.fecalw7.High.dc.16s@otu_table) == this_otu) if (length(sel)==1){ x <- as.numeric(relabun.fecalw7.High.dc.16s@otu_table[sel, ]) subsel <- which(!x==0) df_blast.High$rel_abun_fecalw7_mean[i] <- mean( x ) # [subsel] df_blast.High$rel_abun_fecalw7_min[i] <- min( x ) df_blast.High$rel_abun_fecalw7_max[i] <- max( x ) df_blast.High$rel_abun_fecalw7_n_nonzero[i] <- length( x[subsel] ) df_blast.High$rel_abun_fecalw7_n[i] <- length( x ) } sel <- which( row.names(relabun.cecal.High.dc.16s@otu_table) == this_otu) if (length(sel)==1){ x <- as.numeric(relabun.cecal.High.dc.16s@otu_table[sel, ]) subsel <- which(!x==0) df_blast.High$rel_abun_cecal_mean[i] <- mean( x ) # [subsel] df_blast.High$rel_abun_cecal_min[i] <- min( x ) df_blast.High$rel_abun_cecal_max[i] <- max( x ) df_blast.High$rel_abun_cecal_n_nonzero[i] <- length( x[subsel] ) df_blast.High$rel_abun_cecal_n[i] <- length( x ) } print(paste0("completed OTU # ",i)) } ## Export tables of results ## Control names(df_blast.Cont) # [1] "otu" "AccessionNo" "closest_ncbi_match" "score_bits" # [5] "expect" "pident" "Phylum" "Description" # [9] "Ref" "rel_abun_airw1w6_mean" "rel_abun_airw1w6_min" "rel_abun_airw1w6_max" # [13] "rel_abun_airw1w6_n_nonzero" "rel_abun_airw1w6_n" "rel_abun_fecalw0_mean" "rel_abun_fecalw0_min" # [17] "rel_abun_fecalw0_max" "rel_abun_fecalw0_n_nonzero" "rel_abun_fecalw0_n" "rel_abun_fecalw7_mean" # [21] "rel_abun_fecalw7_min" "rel_abun_fecalw7_max" "rel_abun_fecalw7_n_nonzero" "rel_abun_fecalw7_n" # [25] "rel_abun_cecal_mean" "rel_abun_cecal_min" "rel_abun_cecal_max" "rel_abun_cecal_n_nonzero" # [29] "rel_abun_cecal_n" tab <- df_blast.Cont tab$air_mean_range <- paste0(round(tab$rel_abun_airw1w6_mean,4),"(",round(tab$rel_abun_airw1w6_min,4),"-",round(tab$rel_abun_airw1w6_max,4),")" ) tab$air_n_nz <- paste0(tab$rel_abun_airw1w6_n_nonzero," (",tab$rel_abun_airw1w6_n,")" ) tab$fecalw0_mean_range <- paste0(round(tab$rel_abun_fecalw0_mean,4),"(",round(tab$rel_abun_fecalw0_min,4),"-",round(tab$rel_abun_fecalw0_max,4),")" ) tab$fecalw0_n_nz <- paste0(tab$rel_abun_fecalw0_n_nonzero," (",tab$rel_abun_fecalw0_n,")" ) tab$fecalw7_mean_range <- paste0(round(tab$rel_abun_fecalw7_mean,4),"(",round(tab$rel_abun_fecalw7_min,4),"-",round(tab$rel_abun_fecalw7_max,4),")" ) tab$fecalw7_n_nz <- paste0(tab$rel_abun_fecalw7_n_nonzero," (",tab$rel_abun_fecalw7_n,")" ) tab$cecal_mean_range <- paste0(round(tab$rel_abun_cecal_mean,4),"(",round(tab$rel_abun_cecal_min,4),"-",round(tab$rel_abun_cecal_max,4),")" ) tab$cecal_n_nz <- paste0(tab$rel_abun_cecal_n_nonzero," (",tab$rel_abun_cecal_n,")" ) names(tab) keep <- c( "otu", "AccessionNo" , "closest_ncbi_match", "score_bits", "expect" , "pident" , "Phylum" , "Description" , "Ref" , "air_mean_range" , "air_n_nz" , "fecalw0_mean_range" , "fecalw0_n_nz" , "fecalw7_mean_range" , "fecalw7_n_nz" , "cecal_mean_range" , "cecal_n_nz" ) write.csv(tab[ ,keep], file="Table-results-SigDiffAbun-with-blast-Control-only-vFINAL1.csv", row.names = FALSE ) # note some columns will be blank and need to be filled by inspection of the blast output file, and citation for closest NCBI match, etc ## Low names(df_blast.Low) # [1] "otu" "AccessionNo" "closest_ncbi_match" "score_bits" # [5] "expect" "pident" "Phylum" "Description" # [9] "Ref" "rel_abun_soil_mean" "rel_abun_soil_min" "rel_abun_soil_max" # [13] "rel_abun_soil_n_nonzero" "rel_abun_soil_n" "rel_abun_airw1w6_mean" "rel_abun_airw1w6_min" # [17] "rel_abun_airw1w6_max" "rel_abun_airw1w6_n_nonzero" "rel_abun_airw1w6_n" "rel_abun_fecalw0_mean" # [21] "rel_abun_fecalw0_min" "rel_abun_fecalw0_max" "rel_abun_fecalw0_n_nonzero" "rel_abun_fecalw0_n" # [25] "rel_abun_fecalw7_mean" "rel_abun_fecalw7_min" "rel_abun_fecalw7_max" "rel_abun_fecalw7_n_nonzero" # [29] "rel_abun_fecalw7_n" "rel_abun_cecal_mean" "rel_abun_cecal_min" "rel_abun_cecal_max" # [33] "rel_abun_cecal_n_nonzero" "rel_abun_cecal_n" tab <- df_blast.Low tab$soil_mean_range <- paste0(round(tab$rel_abun_soil_mean,4),"(",round(tab$rel_abun_soil_min,4),"-",round(tab$rel_abun_soil_max,4),")" ) tab$soil_n_nz <- paste0(tab$rel_abun_soil_n_nonzero," (",tab$rel_abun_soil_n,")" ) tab$air_mean_range <- paste0(round(tab$rel_abun_airw1w6_mean,4),"(",round(tab$rel_abun_airw1w6_min,4),"-",round(tab$rel_abun_airw1w6_max,4),")" ) tab$air_n_nz <- paste0(tab$rel_abun_airw1w6_n_nonzero," (",tab$rel_abun_airw1w6_n,")" ) tab$fecalw0_mean_range <- paste0(round(tab$rel_abun_fecalw0_mean,4),"(",round(tab$rel_abun_fecalw0_min,4),"-",round(tab$rel_abun_fecalw0_max,4),")" ) tab$fecalw0_n_nz <- paste0(tab$rel_abun_fecalw0_n_nonzero," (",tab$rel_abun_fecalw0_n,")" ) tab$fecalw7_mean_range <- paste0(round(tab$rel_abun_fecalw7_mean,4),"(",round(tab$rel_abun_fecalw7_min,4),"-",round(tab$rel_abun_fecalw7_max,4),")" ) tab$fecalw7_n_nz <- paste0(tab$rel_abun_fecalw7_n_nonzero," (",tab$rel_abun_fecalw7_n,")" ) tab$cecal_mean_range <- paste0(round(tab$rel_abun_cecal_mean,4),"(",round(tab$rel_abun_cecal_min,4),"-",round(tab$rel_abun_cecal_max,4),")" ) tab$cecal_n_nz <- paste0(tab$rel_abun_cecal_n_nonzero," (",tab$rel_abun_cecal_n,")" ) names(tab) keep <- c( "otu", "AccessionNo" , "closest_ncbi_match", "score_bits", "expect" , "pident" , "Phylum" , "Description" , "Ref" , "soil_mean_range" , "soil_n_nz" , "air_mean_range" , "air_n_nz" , "fecalw0_mean_range" , "fecalw0_n_nz" , "fecalw7_mean_range" , "fecalw7_n_nz" , "cecal_mean_range" , "cecal_n_nz" ) write.csv(tab[ ,keep], file="Table-results-SigDiffAbun-with-blast-Low-only-vFINAL1.csv", row.names = FALSE ) # note some columns will be blank and need to be filled by inspection of the blast output file, and citation for closest NCBI match, etc ## High names(df_blast.High) # [1] "otu" "AccessionNo" "closest_ncbi_match" "score_bits" # [5] "expect" "pident" "Phylum" "Description" # [9] "Ref" "rel_abun_soil_mean" "rel_abun_soil_min" "rel_abun_soil_max" # [13] "rel_abun_soil_n_nonzero" "rel_abun_soil_n" "rel_abun_airw1w6_mean" "rel_abun_airw1w6_min" # [17] "rel_abun_airw1w6_max" "rel_abun_airw1w6_n_nonzero" "rel_abun_airw1w6_n" "rel_abun_fecalw0_mean" # [21] "rel_abun_fecalw0_min" "rel_abun_fecalw0_max" "rel_abun_fecalw0_n_nonzero" "rel_abun_fecalw0_n" # [25] "rel_abun_fecalw7_mean" "rel_abun_fecalw7_min" "rel_abun_fecalw7_max" "rel_abun_fecalw7_n_nonzero" # [29] "rel_abun_fecalw7_n" "rel_abun_cecal_mean" "rel_abun_cecal_min" "rel_abun_cecal_max" # [33] "rel_abun_cecal_n_nonzero" "rel_abun_cecal_n" tab <- df_blast.High tab$soil_mean_range <- paste0(round(tab$rel_abun_soil_mean,4),"(",round(tab$rel_abun_soil_min,4),"-",round(tab$rel_abun_soil_max,4),")" ) tab$soil_n_nz <- paste0(tab$rel_abun_soil_n_nonzero," (",tab$rel_abun_soil_n,")" ) tab$air_mean_range <- paste0(round(tab$rel_abun_airw1w6_mean,4),"(",round(tab$rel_abun_airw1w6_min,4),"-",round(tab$rel_abun_airw1w6_max,4),")" ) tab$air_n_nz <- paste0(tab$rel_abun_airw1w6_n_nonzero," (",tab$rel_abun_airw1w6_n,")" ) tab$fecalw0_mean_range <- paste0(round(tab$rel_abun_fecalw0_mean,4),"(",round(tab$rel_abun_fecalw0_min,4),"-",round(tab$rel_abun_fecalw0_max,4),")" ) tab$fecalw0_n_nz <- paste0(tab$rel_abun_fecalw0_n_nonzero," (",tab$rel_abun_fecalw0_n,")" ) tab$fecalw7_mean_range <- paste0(round(tab$rel_abun_fecalw7_mean,4),"(",round(tab$rel_abun_fecalw7_min,4),"-",round(tab$rel_abun_fecalw7_max,4),")" ) tab$fecalw7_n_nz <- paste0(tab$rel_abun_fecalw7_n_nonzero," (",tab$rel_abun_fecalw7_n,")" ) tab$cecal_mean_range <- paste0(round(tab$rel_abun_cecal_mean,4),"(",round(tab$rel_abun_cecal_min,4),"-",round(tab$rel_abun_cecal_max,4),")" ) tab$cecal_n_nz <- paste0(tab$rel_abun_cecal_n_nonzero," (",tab$rel_abun_cecal_n,")" ) names(tab) keep <- c( "otu", "AccessionNo" , "closest_ncbi_match", "score_bits", "expect" , "pident" , "Phylum" , "Description" , "Ref" , "soil_mean_range" , "soil_n_nz" , "air_mean_range" , "air_n_nz" , "fecalw0_mean_range" , "fecalw0_n_nz" , "fecalw7_mean_range" , "fecalw7_n_nz" , "cecal_mean_range" , "cecal_n_nz" ) write.csv(tab[ ,keep], file="Table-results-SigDiffAbun-with-blast-High-only-vFINAL1.csv", row.names = FALSE ) # note some columns will be blank and need to be filled by inspection of the blast output file, and citation for closest NCBI match, etc #------------------------ #### Comparison of putative butyrate-producer K. alysoides between treatments - incl faecal w0, w7, caecal #------------------------ ### Analyze all K.alysoides-like bacteria that increase across treatments ### collate all OTUs that increase from week 0 to week 7 fecal samples taxa.sig.diffabun.Inc.cont # [1] "OTU_1" "OTU_15" "OTU_19" "OTU_23" "OTU_46" "OTU_57" "OTU_59" "OTU_64" # [9] "OTU_67" "OTU_75" "OTU_101" "OTU_125" "OTU_130" "OTU_133" "OTU_143" "OTU_169" # [17] "OTU_201" "OTU_202" "OTU_247" "OTU_254" "OTU_258" "OTU_273" "OTU_402" "OTU_419" # [25] "OTU_481" "OTU_531" "OTU_616" "OTU_765" "OTU_2039" "OTU_2280" "OTU_2384" "OTU_2419" # [33] "OTU_2760" "OTU_2934" "OTU_2998" "OTU_3004" "OTU_3188" "OTU_3671" "OTU_3795" "OTU_4034" # [41] "OTU_4077" "OTU_4273" "OTU_4387" "OTU_4483" "OTU_4702" "OTU_5280" "OTU_5971" "OTU_6190" # [49] "OTU_6689" "OTU_7119" "OTU_8231" "OTU_9505" "OTU_9762" "OTU_9785" "OTU_10092" taxa.sig.diffabun.Inc.low # [1] "OTU_23" "OTU_59" "OTU_101" "OTU_240" "OTU_273" "OTU_287" "OTU_332" "OTU_765" # [9] "OTU_2039" "OTU_2280" "OTU_2998" "OTU_4273" "OTU_5079" "OTU_5095" "OTU_5280" "OTU_8615" # [17] "OTU_9639" taxa.sig.diffabun.Inc.high # [1] "OTU_1" "OTU_15" "OTU_23" "OTU_26" "OTU_29" "OTU_31" "OTU_35" "OTU_37" # [9] "OTU_38" "OTU_46" "OTU_60" "OTU_64" "OTU_75" "OTU_79" "OTU_82" "OTU_91" # [17] "OTU_101" "OTU_105" "OTU_109" "OTU_115" "OTU_125" "OTU_139" "OTU_173" "OTU_196" # [25] "OTU_287" "OTU_481" "OTU_556" "OTU_616" "OTU_765" "OTU_1737" "OTU_2039" "OTU_2280" # [33] "OTU_2419" "OTU_2750" "OTU_2760" "OTU_2998" "OTU_3004" "OTU_3188" "OTU_3408" "OTU_3456" # [41] "OTU_3669" "OTU_3876" "OTU_4273" "OTU_4387" "OTU_4483" "OTU_5079" "OTU_5375" "OTU_5520" # [49] "OTU_5765" "OTU_5790" "OTU_5915" "OTU_6190" "OTU_6509" "OTU_7377" "OTU_9265" "OTU_9505" # [57] "OTU_9762" "OTU_9785" "OTU_10092" length(taxa.sig.diffabun.Inc.cont);length(taxa.sig.diffabun.Inc.low);length(taxa.sig.diffabun.Inc.high) # [1] 55 # [1] 17 # [1] 59 ### assign closest NCBI match to significantly increasing taxa ## fasta sequences are available from seqs_df <- read_excel(path= paste0(datadir,"/","OTU-rep-sequences.xlsx"), sheet=1, range="B1:C10485", col_names = TRUE) seqs_df <- as.data.frame(seqs_df) str(seqs_df) # 'data.frame': 10484 obs. of 2 variables: # $ subject: chr "OTU_1" "OTU_2" "OTU_3" "OTU_4" ... # $ OTU-seq: chr "TGGGGGATATTGCACAATGGGG ## assign closest NCBI match to significantly increasing taxa # store temporary blast results here blast_dir <- "C:/Workspace/PROJ/PAPER-MICRO-MICE/modelling/blast_outputs_inc_all" ### HIGH df_blast <- data.frame(otu=taxa.sig.diffabun.Inc.high, AccessionNo=NA, closest_ncbi_match=NA, score_bits=NA, expect=NA, pident=NA, Phylum=NA, Description=NA, Ref=NA ) for (i in 1:length(df_blast$otu)) { # length(taxa.sig.diffabun.Inc.high) = 59 #i<-1 sel <- which(seqs_df$subject == df_blast$otu[i]) rep_seq <- seqs_df$`OTU-seq`[sel] fileConn <- file("query_seq.txt") writeLines(text = rep_seq, fileConn) close(fileConn) # blast output formats here: https://www.ncbi.nlm.nih.gov/books/NBK279684/ this_command <- paste0('blastn -db C:/Workspace/DATA/NCBI_BLAST_16S/16SMicrobialDB/16SMicrobial -query query_seq.txt -max_target_seqs 1 -out ',blast_dir,'/output_',i,'.txt') system(command = this_command) if (file.exists(paste0(blast_dir,'/output_',i,'.txt'))) { read_out <- read_file(file = paste0(blast_dir,'/output_',i,'.txt')) # Score idx.before <- str_locate(string = read_out, pattern = "Score = ") idx.after <- str_locate(string = read_out, pattern = " bits ") df_blast$score_bits[i] <- as.numeric( substring(text = read_out, first = as.numeric(idx.before[1,"end"])+1, last = as.numeric(idx.after[1,"start"])-1) ) # Expect - this is unreliable to extract by code # idx.before <- str_locate(string = read_out, pattern = ", Expect = ") # df_blast$expect[i] <- as.numeric( substring(text = read_out, first = as.numeric(idx.before[1,"end"])+1, last = as.numeric(idx.before[1,"end"])+3) ) # Pident (percent identity) ... this fails if 100%, only captures 00 = 0, so check 0's manually idx.after <- str_locate(string = read_out, pattern = ", Gaps = ") df_blast$pident[i] <- as.numeric( substring(text = read_out, first = as.numeric(idx.after[1,"start"])-4, last = as.numeric(idx.after[1,"start"])-3) ) # Putative species / closest NCBI match? idx.before <- str_locate(string = read_out, pattern = "\r\n\r\n\r\n>N") df_blast$closest_ncbi_match[i] <- substring(text = read_out, first = as.numeric(idx.before[1,"end"]), last = as.numeric(idx.before[1,"end"])+60) } else { df_blast$score_bits[i] <- NA #df_blast$expect[i] <- NA df_blast$pident[i] <- NA df_blast$closest_ncbi_match[i] <- NA } print(paste0("Calculated OTU: ", df_blast$otu[i]," no ",i," of ",length(df_blast$otu))) } df_blast.High.all_KA <- df_blast sel <- grep(pattern = "Kineothrix alysoides", x = df_blast.High.all_KA$closest_ncbi_match) df_blast.High.all_KA <- df_blast.High.all_KA[sel, ] df_blast.High.all_KA[ ,c("otu", "closest_ncbi_match", "score_bits", "expect", "pident")] # otu closest_ncbi_match score_bits expect pident # 8 OTU_37 NR_156080.1 Kineothrix alysoides strain KNHs209 16S ribosomal 521 NA 97 # 11 OTU_60 NR_156080.1 Kineothrix alysoides strain KNHs209 16S ribosomal 658 NA 96 # 26 OTU_481 NR_156081.1 Kineothrix alysoides strain KNHs209 16S ribosomal 614 NA 94 # 48 OTU_5520 NR_156081.1 Kineothrix alysoides strain KNHs209 16S ribosomal 658 NA 96 # 50 OTU_5790 NR_156081.1 Kineothrix alysoides strain KNHs209 16S ribosomal 483 NA 95 # 56 OTU_9505 NR_156081.1 Kineothrix alysoides strain KNHs209 16S ribosomal 652 NA 96 ### LOW # clear previous results - OR move to storage folder #unlink(file.path(blast_dir) , recursive=TRUE) # store results in dataframe. Some fields as placeholders to be filled later df_blast <- data.frame(otu=taxa.sig.diffabun.Inc.low, AccessionNo=NA, closest_ncbi_match=NA, score_bits=NA, expect=NA, pident=NA, Phylum=NA, Description=NA, Ref=NA ) for (i in 1:length(df_blast$otu)) { # length(taxa.sig.diffabun.Inc.low) = 17 #i<-1 sel <- which(seqs_df$subject == df_blast$otu[i]) rep_seq <- seqs_df$`OTU-seq`[sel] fileConn <- file("query_seq.txt") writeLines(text = rep_seq, fileConn) close(fileConn) this_command <- paste0('blastn -db C:/Workspace/DATA/NCBI_BLAST_16S/16SMicrobialDB/16SMicrobial -query query_seq.txt -max_target_seqs 1 -out ',blast_dir,'/output_',i,'.txt') system(command = this_command) if (file.exists(paste0(blast_dir,'/output_',i,'.txt'))) { read_out <- read_file(file = paste0(blast_dir,'/output_',i,'.txt')) # Score idx.before <- str_locate(string = read_out, pattern = "Score = ") idx.after <- str_locate(string = read_out, pattern = " bits ") df_blast$score_bits[i] <- as.numeric( substring(text = read_out, first = as.numeric(idx.before[1,"end"])+1, last = as.numeric(idx.after[1,"start"])-1) ) # Expect - this is unreliable to extract by code # idx.before <- str_locate(string = read_out, pattern = ", Expect = ") # df_blast$expect[i] <- as.numeric( substring(text = read_out, first = as.numeric(idx.before[1,"end"])+1, last = as.numeric(idx.before[1,"end"])+3) ) # Pident (percent identity) ... this fails if 100%, only captures 00 = 0, so check 0's manually idx.after <- str_locate(string = read_out, pattern = ", Gaps = ") df_blast$pident[i] <- as.numeric( substring(text = read_out, first = as.numeric(idx.after[1,"start"])-4, last = as.numeric(idx.after[1,"start"])-3) ) # Putative species / closest NCBI match? idx.before <- str_locate(string = read_out, pattern = "\r\n\r\n\r\n>N") df_blast$closest_ncbi_match[i] <- substring(text = read_out, first = as.numeric(idx.before[1,"end"]), last = as.numeric(idx.before[1,"end"])+60) } else { df_blast$score_bits[i] <- NA #df_blast$expect[i] <- NA df_blast$pident[i] <- NA df_blast$closest_ncbi_match[i] <- NA } print(paste0("Calculated OTU: ", df_blast$otu[i]," no ",i," of ",length(df_blast$otu))) } df_blast.Low.all_KA <- df_blast sel <- grep(pattern = "Kineothrix alysoides", x = df_blast.Low.all_KA$closest_ncbi_match) df_blast.Low.all_KA <- df_blast.Low.all_KA[sel, ] df_blast.Low.all_KA[ ,c("otu", "closest_ncbi_match", "score_bits", "expect", "pident")] # otu closest_ncbi_match score_bits expect pident # 17 OTU_9639 NR_156081.1 Kineothrix alysoides strain KNHs209 16S ribosomal 603 NA 94 ### CONTROL # clear previous results - OR move to storage folder #unlink(file.path(blast_dir) , recursive=TRUE) # store results in dataframe. Some fields as placeholders to be filled later df_blast <- data.frame(otu=taxa.sig.diffabun.Inc.cont, AccessionNo=NA, closest_ncbi_match=NA, score_bits=NA, expect=NA, pident=NA, Phylum=NA, Description=NA, Ref=NA ) for (i in 1:length(df_blast$otu)) { # length(taxa.sig.diffabun.Inc.cont) = 55 #i<-1 sel <- which(seqs_df$subject == df_blast$otu[i]) rep_seq <- seqs_df$`OTU-seq`[sel] fileConn <- file("query_seq.txt") writeLines(text = rep_seq, fileConn) close(fileConn) # blast output formats here: https://www.ncbi.nlm.nih.gov/books/NBK279684/ this_command <- paste0('blastn -db C:/Workspace/DATA/NCBI_BLAST_16S/16SMicrobialDB/16SMicrobial -query query_seq.txt -max_target_seqs 1 -out ',blast_dir,'/output_',i,'.txt') system(command = this_command) if (file.exists(paste0(blast_dir,'/output_',i,'.txt'))) { read_out <- read_file(file = paste0(blast_dir,'/output_',i,'.txt')) # Score idx.before <- str_locate(string = read_out, pattern = "Score = ") idx.after <- str_locate(string = read_out, pattern = " bits ") df_blast$score_bits[i] <- as.numeric( substring(text = read_out, first = as.numeric(idx.before[1,"end"])+1, last = as.numeric(idx.after[1,"start"])-1) ) # Expect - this is unreliable to extract by code # idx.before <- str_locate(string = read_out, pattern = ", Expect = ") # df_blast$expect[i] <- as.numeric( substring(text = read_out, first = as.numeric(idx.before[1,"end"])+1, last = as.numeric(idx.before[1,"end"])+3) ) # Pident (percent identity) ... this fails if 100%, only captures 00 = 0, so check 0's manually idx.after <- str_locate(string = read_out, pattern = ", Gaps = ") df_blast$pident[i] <- as.numeric( substring(text = read_out, first = as.numeric(idx.after[1,"start"])-4, last = as.numeric(idx.after[1,"start"])-3) ) # Putative species / closest NCBI match? idx.before <- str_locate(string = read_out, pattern = "\r\n\r\n\r\n>N") df_blast$closest_ncbi_match[i] <- substring(text = read_out, first = as.numeric(idx.before[1,"end"]), last = as.numeric(idx.before[1,"end"])+60) } else { df_blast$score_bits[i] <- NA #df_blast$expect[i] <- NA df_blast$pident[i] <- NA df_blast$closest_ncbi_match[i] <- NA } print(paste0("Calculated OTU: ", df_blast$otu[i]," no ",i," of ",length(df_blast$otu))) } df_blast.Cont.all_KA <- df_blast sel <- grep(pattern = "Kineothrix alysoides", x = df_blast.Cont.all_KA$closest_ncbi_match) df_blast.Cont.all_KA <- df_blast.Cont.all_KA[sel, ] df_blast.Cont.all_KA[ ,c("otu", "closest_ncbi_match", "score_bits", "expect", "pident")] # otu closest_ncbi_match score_bits expect pident # 15 OTU_143 NR_156080.1 Kineothrix alysoides strain KNHs209 16S ribosomal 630 NA 95 # 25 OTU_481 NR_156081.1 Kineothrix alysoides strain KNHs209 16S ribosomal 614 NA 94 # 26 OTU_531 NR_156081.1 Kineothrix alysoides strain KNHs209 16S ribosomal 686 NA 97 # 47 OTU_5971 NR_156080.1 Kineothrix alysoides strain KNHs209 16S ribosomal 569 NA 92 # 52 OTU_9505 NR_156081.1 Kineothrix alysoides strain KNHs209 16S ribosomal 652 NA 96 temp1 <- df_blast.Cont.all_KA temp2 <- df_blast.Low.all_KA temp3 <- df_blast.High.all_KA ### calculate mean OTU rel abun, percent prevalence ### from earlier use % OTU relative abundance phyloseq objects relabun.fecalw0.Cont.dc.16s relabun.fecalw7.Cont.dc.16s relabun.cecal.Cont.dc.16s relabun.fecalw0.Low.dc.16s relabun.fecalw7.Low.dc.16s relabun.cecal.Low.dc.16s relabun.fecalw0.High.dc.16s relabun.fecalw7.High.dc.16s relabun.cecal.High.dc.16s df_blast.Cont.all_KA$rel_abun_fecalw0_mean <-NA df_blast.Cont.all_KA$rel_abun_fecalw0_n_nonzero <-NA df_blast.Cont.all_KA$rel_abun_fecalw0_n <-NA df_blast.Cont.all_KA$rel_abun_fecalw7_mean <-NA df_blast.Cont.all_KA$rel_abun_fecalw7_n_nonzero <-NA df_blast.Cont.all_KA$rel_abun_fecalw7_n <-NA df_blast.Cont.all_KA$rel_abun_cecal_mean <-NA df_blast.Cont.all_KA$rel_abun_cecal_n_nonzero <-NA df_blast.Cont.all_KA$rel_abun_cecal_n <-NA for (i in 1:length(df_blast.Cont.all_KA$otu)) { #i<-1 this_otu <- as.character( df_blast.Cont.all_KA$otu[i] ) sel <- which( row.names(relabun.fecalw0.Cont.dc.16s@otu_table) == this_otu) if (length(sel)==1){ x <- as.numeric(relabun.fecalw0.Cont.dc.16s@otu_table[sel, ]) subsel <- which(!x==0) df_blast.Cont.all_KA$rel_abun_fecalw0_mean[i] <- mean( x ) df_blast.Cont.all_KA$rel_abun_fecalw0_n_nonzero[i] <- length( x[subsel] ) df_blast.Cont.all_KA$rel_abun_fecalw0_n[i] <- length( x ) } sel <- which( row.names(relabun.fecalw7.Cont.dc.16s@otu_table) == this_otu) if (length(sel)==1){ x <- as.numeric(relabun.fecalw7.Cont.dc.16s@otu_table[sel, ]) subsel <- which(!x==0) df_blast.Cont.all_KA$rel_abun_fecalw7_mean[i] <- mean( x ) df_blast.Cont.all_KA$rel_abun_fecalw7_n_nonzero[i] <- length( x[subsel] ) df_blast.Cont.all_KA$rel_abun_fecalw7_n[i] <- length( x ) } sel <- which( row.names(relabun.cecal.Cont.dc.16s@otu_table) == this_otu) if (length(sel)==1){ x <- as.numeric(relabun.cecal.Cont.dc.16s@otu_table[sel, ]) subsel <- which(!x==0) df_blast.Cont.all_KA$rel_abun_cecal_mean[i] <- mean( x ) df_blast.Cont.all_KA$rel_abun_cecal_n_nonzero[i] <- length( x[subsel] ) df_blast.Cont.all_KA$rel_abun_cecal_n[i] <- length( x ) } print(paste0("completed OTU # ",i)) } ## Low df_blast.Low.all_KA$rel_abun_fecalw0_mean <-NA df_blast.Low.all_KA$rel_abun_fecalw0_n_nonzero <-NA df_blast.Low.all_KA$rel_abun_fecalw0_n <-NA df_blast.Low.all_KA$rel_abun_fecalw7_mean <-NA df_blast.Low.all_KA$rel_abun_fecalw7_n_nonzero <-NA df_blast.Low.all_KA$rel_abun_fecalw7_n <-NA df_blast.Low.all_KA$rel_abun_cecal_mean <-NA df_blast.Low.all_KA$rel_abun_cecal_n_nonzero <-NA df_blast.Low.all_KA$rel_abun_cecal_n <-NA for (i in 1:length(df_blast.Low.all_KA$otu)) { #i<-1 this_otu <- as.character( df_blast.Low.all_KA$otu[i] ) sel <- which( row.names(relabun.fecalw0.Low.dc.16s@otu_table) == this_otu) if (length(sel)==1){ x <- as.numeric(relabun.fecalw0.Low.dc.16s@otu_table[sel, ]) subsel <- which(!x==0) df_blast.Low.all_KA$rel_abun_fecalw0_mean[i] <- mean( x ) df_blast.Low.all_KA$rel_abun_fecalw0_n_nonzero[i] <- length( x[subsel] ) df_blast.Low.all_KA$rel_abun_fecalw0_n[i] <- length( x ) } sel <- which( row.names(relabun.fecalw7.Low.dc.16s@otu_table) == this_otu) if (length(sel)==1){ x <- as.numeric(relabun.fecalw7.Low.dc.16s@otu_table[sel, ]) subsel <- which(!x==0) df_blast.Low.all_KA$rel_abun_fecalw7_mean[i] <- mean( x ) df_blast.Low.all_KA$rel_abun_fecalw7_n_nonzero[i] <- length( x[subsel] ) df_blast.Low.all_KA$rel_abun_fecalw7_n[i] <- length( x ) } sel <- which( row.names(relabun.cecal.Low.dc.16s@otu_table) == this_otu) if (length(sel)==1){ x <- as.numeric(relabun.cecal.Low.dc.16s@otu_table[sel, ]) subsel <- which(!x==0) df_blast.Low.all_KA$rel_abun_cecal_mean[i] <- mean( x ) df_blast.Low.all_KA$rel_abun_cecal_n_nonzero[i] <- length( x[subsel] ) df_blast.Low.all_KA$rel_abun_cecal_n[i] <- length( x ) } print(paste0("completed OTU # ",i)) } ## High df_blast.High.all_KA$rel_abun_fecalw0_mean <-NA df_blast.High.all_KA$rel_abun_fecalw0_n_nonzero <-NA df_blast.High.all_KA$rel_abun_fecalw0_n <-NA df_blast.High.all_KA$rel_abun_fecalw7_mean <-NA df_blast.High.all_KA$rel_abun_fecalw7_n_nonzero <-NA df_blast.High.all_KA$rel_abun_fecalw7_n <-NA df_blast.High.all_KA$rel_abun_cecal_mean <-NA df_blast.High.all_KA$rel_abun_cecal_n_nonzero <-NA df_blast.High.all_KA$rel_abun_cecal_n <-NA for (i in 1:length(df_blast.High.all_KA$otu)) { #i<-1 this_otu <- as.character( df_blast.High.all_KA$otu[i] ) sel <- which( row.names(relabun.fecalw0.High.dc.16s@otu_table) == this_otu) if (length(sel)==1){ x <- as.numeric(relabun.fecalw0.High.dc.16s@otu_table[sel, ]) subsel <- which(!x==0) df_blast.High.all_KA$rel_abun_fecalw0_mean[i] <- mean( x ) df_blast.High.all_KA$rel_abun_fecalw0_n_nonzero[i] <- length( x[subsel] ) df_blast.High.all_KA$rel_abun_fecalw0_n[i] <- length( x ) } sel <- which( row.names(relabun.fecalw7.High.dc.16s@otu_table) == this_otu) if (length(sel)==1){ x <- as.numeric(relabun.fecalw7.High.dc.16s@otu_table[sel, ]) subsel <- which(!x==0) df_blast.High.all_KA$rel_abun_fecalw7_mean[i] <- mean( x ) df_blast.High.all_KA$rel_abun_fecalw7_n_nonzero[i] <- length( x[subsel] ) df_blast.High.all_KA$rel_abun_fecalw7_n[i] <- length( x ) } sel <- which( row.names(relabun.cecal.High.dc.16s@otu_table) == this_otu) if (length(sel)==1){ x <- as.numeric(relabun.cecal.High.dc.16s@otu_table[sel, ]) subsel <- which(!x==0) df_blast.High.all_KA$rel_abun_cecal_mean[i] <- mean( x ) df_blast.High.all_KA$rel_abun_cecal_n_nonzero[i] <- length( x[subsel] ) df_blast.High.all_KA$rel_abun_cecal_n[i] <- length( x ) } print(paste0("completed OTU # ",i)) } df_blast.Cont.all_KA df_blast.Low.all_KA df_blast.High.all_KA dat <- data.frame(otu = rep( c(as.character(df_blast.Cont.all_KA$otu),as.character(df_blast.Low.all_KA$otu),as.character(df_blast.High.all_KA$otu)), times =3), Treatment = rep( c(rep("Control",times=dim(df_blast.Cont.all_KA)[1]),rep("Low",times=dim(df_blast.Low.all_KA)[1]),rep("High",times=dim(df_blast.High.all_KA)[1])), times = 3), pident = rep( c(df_blast.Cont.all_KA$pident,df_blast.Low.all_KA$pident,df_blast.High.all_KA$pident), times = 3), rel_abun_mean = c(df_blast.Cont.all_KA$rel_abun_fecalw0_mean,df_blast.Low.all_KA$rel_abun_fecalw0_mean,df_blast.High.all_KA$rel_abun_fecalw0_mean, df_blast.Cont.all_KA$rel_abun_fecalw7_mean,df_blast.Low.all_KA$rel_abun_fecalw7_mean,df_blast.High.all_KA$rel_abun_fecalw7_mean, df_blast.Cont.all_KA$rel_abun_cecal_mean,df_blast.Low.all_KA$rel_abun_cecal_mean,df_blast.High.all_KA$rel_abun_cecal_mean), n_nonzero = c(df_blast.Cont.all_KA$rel_abun_fecalw0_n_nonzero,df_blast.Low.all_KA$rel_abun_fecalw0_n_nonzero,df_blast.High.all_KA$rel_abun_fecalw0_n_nonzero, df_blast.Cont.all_KA$rel_abun_fecalw7_n_nonzero,df_blast.Low.all_KA$rel_abun_fecalw7_n_nonzero,df_blast.High.all_KA$rel_abun_fecalw7_n_nonzero, df_blast.Cont.all_KA$rel_abun_cecal_n_nonzero,df_blast.Low.all_KA$rel_abun_cecal_n_nonzero,df_blast.High.all_KA$rel_abun_cecal_n_nonzero), n = c(df_blast.Cont.all_KA$rel_abun_fecalw0_n,df_blast.Low.all_KA$rel_abun_fecalw0_n,df_blast.High.all_KA$rel_abun_fecalw0_n, df_blast.Cont.all_KA$rel_abun_fecalw7_n,df_blast.Low.all_KA$rel_abun_fecalw7_n,df_blast.High.all_KA$rel_abun_fecalw7_n, df_blast.Cont.all_KA$rel_abun_cecal_n,df_blast.Low.all_KA$rel_abun_cecal_n,df_blast.High.all_KA$rel_abun_cecal_n) ) dat$Sample_type <- c( rep("Faecal w0",times = length( c(as.character(df_blast.Cont.all_KA$otu),as.character(df_blast.Low.all_KA$otu),as.character(df_blast.High.all_KA$otu)) ) ), rep("Faecal w7",times = length( c(as.character(df_blast.Cont.all_KA$otu),as.character(df_blast.Low.all_KA$otu),as.character(df_blast.High.all_KA$otu)) ) ), rep( "Caecal",times = length( c(as.character(df_blast.Cont.all_KA$otu),as.character(df_blast.Low.all_KA$otu),as.character(df_blast.High.all_KA$otu)) ) ) ) dat$percent_prev <- 100*dat$n_nonzero/dat$n dat$log10_mean_rel_abun <- log10(dat$rel_abun_mean) str(dat) names(dat) # [1] "otu" "Treatment" "pident" "rel_abun_mean" "n_nonzero" "n" # [7] "Sample_type" "percent_prev" "log10_mean_rel_abun" dat$Treatment <- factor(dat$Treatment, levels = c("Control","Low","High"), ordered = TRUE) dat$Sample_type <- factor(dat$Sample_type, levels = c("Faecal w0","Faecal w7","Caecal"), ordered = TRUE) dat$pident <- factor(dat$pident) levels(dat$pident) # "92" "94" "95" "96" "97" str(dat) # 'data.frame': 36 obs. of 9 variables: # $ otu : Factor w/ 10 levels "OTU_143","OTU_37",..: 1 3 4 7 9 10 2 8 3 5 ... # $ Treatment : Ord.factor w/ 3 levels "Control"<"Low"<..: 1 1 1 1 1 2 3 3 3 3 ... # $ pident : Factor w/ 5 levels "92","94","95",..: 3 2 5 1 4 2 5 4 2 4 ... # $ rel_abun_mean : num 0.00334 0.19496 0.00291 0.00188 0.00448 ... # $ n_nonzero : int 3 10 7 3 7 5 9 17 12 11 ... # $ n : int 18 18 18 18 18 18 18 18 18 18 ... # $ Sample_type : Ord.factor w/ 3 levels "Faecal w0"<"Faecal w7"<..: 1 1 1 1 1 1 1 1 1 1 ... # $ percent_prev : num 16.7 55.6 38.9 16.7 38.9 ... # $ log10_mean_rel_abun: num -2.48 -0.71 -2.54 -2.72 -2.35 ... dat$group <- NA sel <- grep(pattern = "Faecal", x = dat$Sample_type) dat$group[sel] <- "Faecal" sel <- grep(pattern = "Caecal", x = dat$Sample_type) dat$group[sel] <- "Caecal" dat$group <- factor(dat$group, levels = c("Faecal","Caecal"), ordered = TRUE) dat$link <- paste0(dat$otu,"_",dat$Treatment,"_",dat$group) cols <- c( "Control" ="#f46d43", "Low" ="#66c2a5", "High" ="#5e4fa2" ) shapes <- c( "Faecal w0"=1, "Faecal w7"=16, "Caecal"=17 ) pid_sizes <- c("92"= 1.5, "94"= 2, "95"= 2.5, "96"= 3, "97"= 3.5) p <- ggplot(data=dat, aes(x=log10_mean_rel_abun, y = percent_prev, color = Treatment, shape = Sample_type)) + geom_point(aes(size = pident)) + geom_line(aes(group=link)) + scale_shape_manual(values = shapes, name = "Sample\ntype") + scale_size_manual(values = pid_sizes, name = "Percent\nidentity") + scale_color_manual(values = cols) + theme_classic() + labs(x = "log10(mean % OTU rel. abun.)", y = "Prevalence (%)" ) + facet_wrap(~group) + theme( plot.margin = margin(t = 2, r = 2, b = 2, l = 5, "pt"), axis.ticks.length=unit(-3, "pt"), axis.text.x = element_text(margin=margin(t = 5,r = 5,b = 2,l = 5,"pt"), size = rel(0.7)), # , size = rel(0.9)) axis.text.y = element_text(margin=margin(t = 5,r = 5,b = 5,l = 2,"pt"), size = rel(0.7)), axis.title.x = element_text(margin=margin(t = 5,r = 2,b = 5,l = 2,"pt"), size = rel(0.8)), axis.title.y = element_text(margin=margin(t = 5,r = 2,b = 5,l = 2,"pt"), size = rel(0.8)), strip.background = element_rect(fill="white", linetype = "blank"), strip.text = element_text(margin=margin(t = 0,r = 0,b = 2,l = 0,"pt"), size = rel(0.8)), legend.key.size = unit(2, "pt"), legend.text = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.5)), legend.title = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.6)), legend.margin = margin(t = 0,r = 0,b = 0,l = 2,"pt"), legend.box.spacing = unit(2, "pt"), legend.box.margin = margin(t = 0,r = 0,b = 0,l = 0,"pt"), legend.spacing = unit(4, "pt") ) p ggsave(plot=p, filename = paste0("plots/","All_KA_pc_prevalence-vs-log10-mean-pc-OTU-rel-abun-Faecal-vs-Caecal-vFINAL.tiff"), width = 10, height = 7, units = "cm", dpi = 600, compression = "lzw") # this plot presents all mice. See plot later for male vs female breakdown ### Rel abun K. alysoides vs OF centre times - most anxious Females ### Analyse total % of K. alysoides-like OTUs in female mice with the lowest Open Field centre times ## use % OTU relative abundance phyloseq objects dim( relabun.fecalw7.Cont.dc.16s@sam_data ) # 18 20 dim( relabun.cecal.Cont.dc.16s@sam_data ) # 18 20 dim( relabun.fecalw7.Low.dc.16s@sam_data ) # 17 20 dim( relabun.cecal.Low.dc.16s@sam_data ) # 17 20 dim( relabun.fecalw7.High.dc.16s@sam_data ) # 18 20 dim( relabun.cecal.High.dc.16s@sam_data ) # 18 20 ## identify subjects str(of.post.COPY) sel <- which(of.post.COPY$Sex=="Female") beh_fem <- of.post.COPY[sel, ] str(beh_fem) #'data.frame': 27 obs. of 38 variables: dat <- data.frame(mouseID = c(as.character(relabun.fecalw7.Cont.dc.16s@sam_data$mouseID), as.character(relabun.cecal.Cont.dc.16s@sam_data$mouseID), as.character(relabun.fecalw7.Low.dc.16s@sam_data$mouseID), as.character(relabun.cecal.Low.dc.16s@sam_data$mouseID), as.character(relabun.fecalw7.High.dc.16s@sam_data$mouseID), as.character(relabun.cecal.High.dc.16s@sam_data$mouseID)), Treatment = c(rep("Control",times=18*2), rep("Low",times=17*2), rep("High",times=18*2)), Sample_type = c(rep("Faecal w7",times=18),rep("Caecal",times=18), rep("Faecal w7",times=17),rep("Caecal",times=17), rep("Faecal w7",times=18),rep("Caecal",times=18)) ) dat$percent_all_KA <- NA dat$Centre_time <- NA ## list all KA-like OTUs all_KA_OTUs <- unique( c( as.character( df_blast.Cont.all_KA$otu ), as.character( df_blast.Low.all_KA$otu ), as.character( df_blast.High.all_KA$otu ) ) ) all_KA_OTUs # [1] "OTU_143" "OTU_481" "OTU_531" "OTU_5971" "OTU_9505" "OTU_9639" "OTU_37" "OTU_60" "OTU_5520" # [10] "OTU_5790" names(dat) # "mouseID" "Treatment" "Sample_type" "percent_all_KA" "Centre_time" for (i in 1:dim(dat)[1]) { # i<-1 if (dat$Treatment[i]=="Control" & dat$Sample_type[i]=="Faecal w7") { phy_obj <- relabun.fecalw7.Cont.dc.16s } else if (dat$Treatment[i]=="Control" & dat$Sample_type[i]=="Caecal") { phy_obj <- relabun.cecal.Cont.dc.16s } else if (dat$Treatment[i]=="Low" & dat$Sample_type[i]=="Faecal w7") { phy_obj <- relabun.fecalw7.Low.dc.16s } else if (dat$Treatment[i]=="Low" & dat$Sample_type[i]=="Caecal") { phy_obj <- relabun.cecal.Low.dc.16s } else if (dat$Treatment[i]=="High" & dat$Sample_type[i]=="Faecal w7") { phy_obj <- relabun.fecalw7.High.dc.16s # else: #(dat$Treatment[i]=="High" & dat$Sample_type[i]=="Caecal") } else { phy_obj <- relabun.cecal.High.dc.16s } this_subject <- as.character( dat$mouseID[i] ) # this_subject: "C1m3" sel_phy_obj <- prune_samples( phy_obj@sam_data$mouseID == this_subject, phy_obj ) # sample_names(sel_phy_obj): "C1m3T16" sel_KA <- prune_taxa( taxa_names(sel_phy_obj) %in% all_KA_OTUs, sel_phy_obj ) #sel_KA@otu_table dat$percent_all_KA[i] <- sum( taxa_sums(sel_KA@otu_table) ) sel <- which(as.character(beh_fem$ID) == as.character(dat$mouseID[i])) if (length(sel)==1) { dat$Centre_time[i] <- beh_fem$Centre_time[sel] } print(paste0("mouse: ", dat$mouseID[i]," ",beh_fem$ID[sel] )) } ## male are not present in 'dat' ok <- complete.cases(dat) sel <- which(ok==TRUE) dat <- dat[sel, ] dat$lowest_2 <- NA dat$lowest_3 <- NA dat$lowest_4 <- NA dat$lowest_5 <- NA x <- 2 sel <- which(dat$Treatment=="Control" & dat$Sample_type=="Faecal w7") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Faecal w7" sel <- which(dat$Treatment=="Control" & dat$Sample_type=="Caecal") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Caecal" sel <- which(dat$Treatment=="Low" & dat$Sample_type=="Faecal w7") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Faecal w7" sel <- which(dat$Treatment=="Low" & dat$Sample_type=="Caecal") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Caecal" sel <- which(dat$Treatment=="High" & dat$Sample_type=="Faecal w7") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Faecal w7" sel <- which(dat$Treatment=="High" & dat$Sample_type=="Caecal") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Caecal" choose <- which(dat$lowest_2=="Faecal w7") plot(dat$percent_all_KA[choose], dat$Centre_time[choose]) choose <- which(dat$lowest_2=="Caecal") plot(dat$percent_all_KA[choose], dat$Centre_time[choose]) # Faecal w7 cor(dat$percent_all_KA[choose], dat$Centre_time[choose]) # -0.165264 # Caecal cor(dat$percent_all_KA[choose], dat$Centre_time[choose]) # 0.8997448 # # # x <- 3 sel <- which(dat$Treatment=="Control" & dat$Sample_type=="Faecal w7") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Faecal w7" sel <- which(dat$Treatment=="Control" & dat$Sample_type=="Caecal") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Caecal" sel <- which(dat$Treatment=="Low" & dat$Sample_type=="Faecal w7") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Faecal w7" sel <- which(dat$Treatment=="Low" & dat$Sample_type=="Caecal") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Caecal" sel <- which(dat$Treatment=="High" & dat$Sample_type=="Faecal w7") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Faecal w7" sel <- which(dat$Treatment=="High" & dat$Sample_type=="Caecal") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Caecal" choose <- which(dat$lowest_3=="Faecal w7") plot(dat$percent_all_KA[choose], dat$Centre_time[choose]) choose <- which(dat$lowest_3=="Caecal") plot(dat$percent_all_KA[choose], dat$Centre_time[choose]) # Faecal w7 cor(dat$percent_all_KA[choose], dat$Centre_time[choose]) # -0.2536777 # Caecal cor(dat$percent_all_KA[choose], dat$Centre_time[choose]) # 0.7306088 # # # x <- 4 sel <- which(dat$Treatment=="Control" & dat$Sample_type=="Faecal w7") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Faecal w7" sel <- which(dat$Treatment=="Control" & dat$Sample_type=="Caecal") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Caecal" sel <- which(dat$Treatment=="Low" & dat$Sample_type=="Faecal w7") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Faecal w7" sel <- which(dat$Treatment=="Low" & dat$Sample_type=="Caecal") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Caecal" sel <- which(dat$Treatment=="High" & dat$Sample_type=="Faecal w7") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Faecal w7" sel <- which(dat$Treatment=="High" & dat$Sample_type=="Caecal") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Caecal" choose <- which(dat$lowest_4=="Faecal w7") plot(dat$percent_all_KA[choose], dat$Centre_time[choose]) choose <- which(dat$lowest_4=="Caecal") plot(dat$percent_all_KA[choose], dat$Centre_time[choose]) # Faecal w7 cor(dat$percent_all_KA[choose], dat$Centre_time[choose]) # -0.08290818 # Caecal cor(dat$percent_all_KA[choose], dat$Centre_time[choose]) # 0.7595732 # # # x <- 5 sel <- which(dat$Treatment=="Control" & dat$Sample_type=="Faecal w7") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Faecal w7" sel <- which(dat$Treatment=="Control" & dat$Sample_type=="Caecal") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Caecal" sel <- which(dat$Treatment=="Low" & dat$Sample_type=="Faecal w7") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Faecal w7" sel <- which(dat$Treatment=="Low" & dat$Sample_type=="Caecal") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Caecal" sel <- which(dat$Treatment=="High" & dat$Sample_type=="Faecal w7") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Faecal w7" sel <- which(dat$Treatment=="High" & dat$Sample_type=="Caecal") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Caecal" choose <- which(dat$lowest_5=="Faecal w7") plot(dat$percent_all_KA[choose], dat$Centre_time[choose]) choose <- which(dat$lowest_5=="Caecal") plot(dat$percent_all_KA[choose], dat$Centre_time[choose]) # Faecal w7 cor(dat$percent_all_KA[choose], dat$Centre_time[choose]) # -0.2923509 # Caecal cor(dat$percent_all_KA[choose], dat$Centre_time[choose]) # 0.3019307 ## No relationship found for faecal w7. ## Strip back to Caecal samples dat_c <- dat[ which(dat$Sample_type=="Caecal"), ] names(dat_c) # [1] "mouseID" "Treatment" "Sample_type" "percent_all_KA" "Centre_time" "lowest_2" # [7] "lowest_3" "lowest_4" "lowest_5" sel <- which(dat_c$lowest_2=="Caecal") dat_c$lowest_2[sel] <- 2 sel <- which(dat_c$lowest_3=="Caecal") dat_c$lowest_3[sel] <- 3 sel <- which(dat_c$lowest_4=="Caecal") dat_c$lowest_4[sel] <- 4 sel <- which(dat_c$lowest_5=="Caecal") dat_c$lowest_5[sel] <- 5 ## reformat for plotting with ggplot facet_grid names(dat_c) # [1] "mouseID" "Treatment" "Sample_type" "percent_all_KA" "Centre_time" "lowest_2" # [7] "lowest_3" "lowest_4" "lowest_5" new_names <- c("mouseID", "Treatment", "Sample_type", "percent_all_KA", "Centre_time", "lowest_X") get_cols <- c("lowest_2", "lowest_3", "lowest_4", "lowest_5") most_anxious_x <- c(2:5) length(most_anxious_x) # 4 dat_c.plot <- data.frame(matrix(nrow = 1, ncol = length( new_names ))) names(dat_c.plot) <- new_names for (i in 1:length(most_anxious_x)) { #i<-1 # getsubset of dataframe which examines most anxious X mice in each treatment dat_sel <- dat_c[ ,c( names(dat_c)[1:5],get_cols[i] ) ] subsel <- which(dat_sel[ , get_cols[i] ] == most_anxious_x[i]) names(dat_sel) <- names(dat_c.plot) dat_c.plot <- rbind(dat_c.plot, dat_sel[subsel, ]) } # remove NA first row dat_c.plot <- dat_c.plot[-1, ] str(dat_c.plot) # 'data.frame': 42 obs. of 6 variables: # $ mouseID : chr "C4m2" "C2m1" "L3m1" "L5m2" ... # $ Treatment : chr "Control" "Control" "Low" "Low" ... # $ Sample_type : chr "Caecal" "Caecal" "Caecal" "Caecal" ... # $ percent_all_KA: num 0.564 0.87 0.927 1.288 1.222 ... # $ Centre_time : num 3.3 10.4 13 18.5 28.5 22.6 3.3 10.6 10.4 20.4 ... # $ lowest_X : chr "2" "2" "2" "2" ... 3*2 + 3*3 + 3*4 + 3*5 # 42 temp <- dat_c.plot cols <- c( "Control" ="#f46d43", "Low" ="#66c2a5", "High" ="#5e4fa2" ) levels(factor(dat_c.plot$lowest_X)) # [1] "2" "3" "4" "5" #dat_c.plot$lowest_X <- factor(dat_c.plot$lowest_X, levels = c("2", "3", "4", "5"), labels = c("2 most\nanxious", "3 most\nanxious", "4 most\nanxious", "5 most\nanxious") ) dat_c.plot$lowest_X <- factor(dat_c.plot$lowest_X, levels = c("2", "3", "4", "5"), labels = c("Female\n2 most anxious", "Female\n3 most anxious", "Female\n4 most anxious", "Female\n5 most anxious") ) # annotation for r and n anno <- data.frame(x = c(1.5, 1.5, 1.5, 1.5), y = c(0, 0, 0, 0), #lowest_X = c("2 most\nanxious", "3 most\nanxious", "4 most\nanxious", "5 most\nanxious"), lowest_X = c("Female\n2 most anxious", "Female\n3 most anxious", "Female\n4 most anxious", "Female\n5 most anxious"), r = paste0("r = ", round( c(0.8997448, 0.7306088, 0.7595732, 0.3019307),digits = 2 )), n = paste0("n = ", c(2*3, 3*3, 4*3, 5*3)) ) p <- ggplot(data=dat_c.plot, aes(x=percent_all_KA, y= Centre_time, color = Treatment)) + geom_point(size=1) + scale_colour_manual(values = cols) + #stat_smooth(aes(x=percent_all_KA, y= Centre_time), inherit.aes = FALSE, show.legend = FALSE) + geom_smooth(aes(x=percent_all_KA, y= Centre_time), method='lm', inherit.aes = FALSE, show.legend = FALSE, fill = "#bdbdbd", size = 0.5) + #geom_text( data = anno, aes(x=x, y=y, label = r), size = 2.5, inherit.aes = FALSE) + geom_text( data = anno, aes(x=x, y=y, label = paste0(r,"\n",n)), size = 2.5, inherit.aes = FALSE) + theme_classic() + #labs(x = "Total % OTU rel. abun. K. alysoides", y = "Time in centre (s)") + labs(x = parse(text='"Total % OTU rel. abun. "*italic("K. alysoides")'), y = "Time in centre (s)") + theme(legend.position="bottom") + facet_grid(~lowest_X) + #, scales="free_x", space="free_x") + theme( plot.margin = margin(t = 5, r = 5, b = 0, l = 5, "pt"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.ticks.length=unit(-3, "pt"), axis.text.x = element_text(margin=margin(t = 5,r = 5,b = 2,l = 5,"pt"), vjust=0.5, size = rel(0.7)), axis.text.y = element_text(margin=margin(t = 5,r = 5,b = 5,l = 5,"pt"), size = rel(0.6)), axis.title.x = element_text(margin=margin(t = 2,r = 0,b = 0,l = 0,"pt"), size = rel(0.8)), axis.title.y = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.8)), strip.background = element_rect(fill="white", linetype = "blank"), strip.text = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.7)), legend.box.margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), legend.key.size = unit(5, "pt"), legend.text = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.6)), legend.title = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.7)), legend.margin = margin(t = 0,r = 0,b = 5,l = 0,"pt"), legend.box.spacing = unit(5, "pt") ) p dev.print(tiff, file = paste0("plots/","Total-OTU-rel-abun-KA-vs-Base-anxiety-Most-anxious-in-Treatments-vFINAL.tiff"), width = 12, height = 6, units = "cm", res=600, compression="lzw") # NO LEGEND or X-AXIS TITLE p <- ggplot(data=dat_c.plot, aes(x=percent_all_KA, y= Centre_time, color = Treatment)) + geom_point(size=1) + scale_colour_manual(values = cols) + #stat_smooth(aes(x=percent_all_KA, y= Centre_time), inherit.aes = FALSE, show.legend = FALSE) + geom_smooth(aes(x=percent_all_KA, y= Centre_time), method='lm', inherit.aes = FALSE, show.legend = FALSE, fill = "#bdbdbd", size = 0.5) + #geom_text( data = anno, aes(x=x, y=y, label = r), size = 2.5, inherit.aes = FALSE) + geom_text( data = anno, aes(x=x, y=y, label = paste0(r,"\n",n)), size = 2.5, inherit.aes = FALSE, lineheight=0.9) + theme_classic() + #labs(x = "Total % OTU rel. abun. K. alysoides", y = "Time in centre (s)") + #labs(x = parse(text='"Total % OTU rel. abun. "*italic("K. alysoides")'), y = "Time in centre (s)") + labs(x = NULL , y = "Time in centre (s)") + theme(legend.position="none") + facet_grid(~lowest_X) + #, scales="free_x", space="free_x") + theme( plot.margin = margin(t = 5, r = 5, b = 5, l = 5, "pt"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.ticks.length=unit(-3, "pt"), axis.text.x = element_text(margin=margin(t = 5,r = 5,b = 2,l = 5,"pt"), vjust=0.5, size = rel(0.7)), axis.text.y = element_text(margin=margin(t = 5,r = 5,b = 5,l = 5,"pt"), size = rel(0.6)), axis.title.x = element_text(margin=margin(t = 2,r = 0,b = 0,l = 0,"pt"), size = rel(0.8)), axis.title.y = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.8)), strip.background = element_rect(fill="white", linetype = "blank"), strip.text = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.7)) # legend.box.margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), # legend.key.size = unit(5, "pt"), # legend.text = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.6)), # legend.title = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.7)), # legend.margin = margin(t = 0,r = 0,b = 5,l = 0,"pt"), # legend.box.spacing = unit(5, "pt") ) p grid.text(label = "A" , x = unit(0.03, "npc") , y = unit(0.95,"npc"), gp=gpar(fontsize=11, fontface="bold") ) dev.print(tiff, file = paste0("plots/","Total-OTU-rel-abun-KA-vs-Base-anxiety-Most-anxious-in-Treatments-NO-TITLE-NO-LEGEND-A-vFINAL.tiff"), width = 12, height = 5, units = "cm", res=600, compression="lzw") ### Also compare '1/3 most anxious' vs. '1/3 least anxious' based on top third and bottom third of Open Field centre times ## identify subjects str(of.post.COPY) sel <- which(of.post.COPY$Sex=="Female") beh_fem <- of.post.COPY[sel, ] str(beh_fem) #'data.frame': 27 obs. of 38 variables: dat <- data.frame(mouseID = c(as.character(relabun.cecal.Cont.dc.16s@sam_data$mouseID), as.character(relabun.cecal.Low.dc.16s@sam_data$mouseID), as.character(relabun.cecal.High.dc.16s@sam_data$mouseID)), Treatment = c(rep("Control",times=18), rep("Low",times=17), rep("High",times=18)), Sample_type = c(rep("Caecal",times=18), rep("Caecal",times=17), rep("Caecal",times=18)) ) dat$percent_all_KA <- NA dat$Centre_time <- NA ## list all KA-like OTUs all_KA_OTUs <- unique( c( as.character( df_blast.Cont.all_KA$otu ), as.character( df_blast.Low.all_KA$otu ), as.character( df_blast.High.all_KA$otu ) ) ) all_KA_OTUs # [1] "OTU_143" "OTU_481" "OTU_531" "OTU_5971" "OTU_9505" "OTU_9639" "OTU_37" "OTU_60" "OTU_5520" # [10] "OTU_5790" names(dat) # "mouseID" "Treatment" "Sample_type" "percent_all_KA" "Centre_time" for (i in 1:dim(dat)[1]) { # i<-1 if (dat$Treatment[i]=="Control" & dat$Sample_type[i]=="Caecal") { phy_obj <- relabun.cecal.Cont.dc.16s } else if (dat$Treatment[i]=="Low" & dat$Sample_type[i]=="Caecal") { phy_obj <- relabun.cecal.Low.dc.16s } else { phy_obj <- relabun.cecal.High.dc.16s } this_subject <- as.character( dat$mouseID[i] ) # this_subject: "C1m3" sel_phy_obj <- prune_samples( phy_obj@sam_data$mouseID == this_subject, phy_obj ) # sample_names(sel_phy_obj): "C6m2Ce" sel_KA <- prune_taxa( taxa_names(sel_phy_obj) %in% all_KA_OTUs, sel_phy_obj ) #sel_KA@otu_table dat$percent_all_KA[i] <- sum( taxa_sums(sel_KA@otu_table) ) sel <- which(as.character(beh_fem$ID) == as.character(dat$mouseID[i])) if (length(sel)==1) { dat$Centre_time[i] <- beh_fem$Centre_time[sel] } print(paste0("mouse: ", dat$mouseID[i]," ",beh_fem$ID[sel] )) } ## note: males are not present in 'dat' ok <- complete.cases(dat) sel <- which(ok==TRUE) dat <- dat[sel, ] dat$group <- NA ## define 'anxious' and 'less anxious' groups based on ## lower and upper thirds of Open Field centre times quantile(beh_fem$Centre_time, probs = c(1/3, 2/3) ) # 33.33333% 66.66667% # 26.56667 51.76667 beh_fem$group <- NA sel <- which(beh_fem$Centre_time <= quantile(beh_fem$Centre_time, probs = c(1/3) )) beh_fem$group[sel] <- "anxious" sel <- which(beh_fem$Centre_time >= quantile(beh_fem$Centre_time, probs = c(2/3) )) beh_fem$group[sel] <- "less anxious" # remove middle third ok <- complete.cases(beh_fem) sel <- which(ok==TRUE) beh_fem_groups <- beh_fem[sel, ] dim(beh_fem_groups) # 18 39 beh_fem_groups[ ,c("ID","Treatment","Centre_time","group")] # ID Treatment Centre_time group # 4 C2m1 Control 10.4 anxious # 5 C2m2 Control 162.5 less anxious # 6 C2m3 Control 88.7 less anxious # 10 C4m1 Control 12.1 anxious # 11 C4m2 Control 3.3 anxious # 13 C5m1 Control 10.6 anxious # 15 C5m3 Control 25.5 anxious # 27 H3m3 High 61.4 less anxious # 28 H4m1 High 22.6 anxious # 29 H4m2 High 90.0 less anxious # 34 H6m1 High 97.6 less anxious # 36 H6m3 High 57.7 less anxious # 38 L1m2 Low 82.5 less anxious # 43 L3m1 Low 13.0 anxious # 44 L3m2 Low 126.8 less anxious # 45 L3m3 Low 20.4 anxious # 50 L5m2 Low 18.5 anxious # 51 L5m3 Low 113.0 less anxious anxious_mice <- as.character( beh_fem_groups$ID[ which(beh_fem_groups$group=="anxious") ] ) anxious_mice # "C2m1" "C4m1" "C4m2" "C5m1" "C5m3" "H4m1" "L3m1" "L3m3" "L5m2" less_anxious_mice <- as.character( beh_fem_groups$ID[ which(beh_fem_groups$group=="less anxious") ] ) less_anxious_mice # "C2m2" "C2m3" "H3m3" "H4m2" "H6m1" "H6m3" "L1m2" "L3m2" "L5m3" sel <- which(dat$mouseID %in% anxious_mice) sel # 1 3 4 6 9 11 12 16 22 dat$mouseID[sel] # C4m2 C4m1 C5m1 C5m3 C2m1 L3m3 L3m1 L5m2 H4m1 dat$group[sel] <- "anxious" sel <- which(dat$mouseID %in% less_anxious_mice) sel # 7 8 10 13 17 19 24 25 26 dat$mouseID[sel] # C2m2 C2m3 L3m2 L1m2 L5m3 H3m3 H4m2 H6m1 H6m3 dat$group[sel] <- "less anxious" # remove missing data ok <- complete.cases(dat) sel <- which(ok==TRUE) dat_groups <- dat[sel, ] choose <- which(dat_groups$group=="anxious") plot(dat_groups$percent_all_KA[choose], dat_groups$Centre_time[choose]) cor(dat_groups$percent_all_KA[choose], dat_groups$Centre_time[choose]) # 0.5847493 choose <- which(dat_groups$group=="less anxious") plot(dat_groups$percent_all_KA[choose], dat_groups$Centre_time[choose]) cor(dat_groups$percent_all_KA[choose], dat_groups$Centre_time[choose]) # -0.389352 str(dat_groups) # 'data.frame': 18 obs. of 6 variables: # $ mouseID : Factor w/ 53 levels "C1m1","C1m2",..: 11 10 13 15 5 6 4 43 44 42 ... # $ Treatment : Factor w/ 3 levels "Control","High",..: 1 1 1 1 1 1 1 3 3 3 ... # $ Sample_type : Factor w/ 1 level "Caecal": 1 1 1 1 1 1 1 1 1 1 ... # $ percent_all_KA: num 0.564 1.05 1.14 1.447 0.486 ... # $ Centre_time : num 3.3 12.1 10.6 25.5 162.5 ... # $ group : chr "anxious" "anxious" "anxious" "anxious" ... table(dat_groups$group) # Anxious Less anxious # 9 9 cols <- c( "Control" ="#f46d43", "Low" ="#66c2a5", "High" ="#5e4fa2" ) #dat_groups$group <- factor(dat_groups$group, levels = c("anxious", "less anxious"), labels = c("Anxious", "Less anxious") ) #dat_groups$group <- factor(dat_groups$group, levels = c("anxious", "less anxious"), labels = c("1/3 most\nanxious", "1/3 least\nanxious") ) dat_groups$group <- factor(dat_groups$group, levels = c("anxious", "less anxious"), labels = c("Female\n1/3 most anxious", "Female\n1/3 least anxious") ) # annotation for r and n anno <- data.frame(x = c(1.1, 1.1), y = c(5, 140), #group = c("Anxious", "Less anxious"), #group = c("1/3 most\nanxious", "1/3 least\nanxious"), group = c("Female\n1/3 most anxious", "Female\n1/3 least anxious"), r = paste0("r = ", round( c(0.5847493, -0.389352),digits = 2 )), n = paste0("n = ", c(9,9)) ) p <- ggplot(data=dat_groups, aes(x=percent_all_KA, y= Centre_time, color = Treatment)) + geom_point(size=1) + scale_colour_manual(values = cols) + #stat_smooth(aes(x=percent_all_KA, y= Centre_time), inherit.aes = FALSE, show.legend = FALSE) + geom_smooth(aes(x=percent_all_KA, y= Centre_time), method='lm', inherit.aes = FALSE, show.legend = FALSE, fill = "#bdbdbd", size = 0.5) + #geom_text( data = anno, aes(x=x, y=y, label = r), size = 2.5, inherit.aes = FALSE) + geom_text( data = anno, aes(x=x, y=y, label = paste0(r,"\n",n)), size = 2.5, inherit.aes = FALSE, lineheight=0.9) + theme_classic() + #labs(x = "Total % OTU rel. abun. K. alysoides", y = "Time in centre (s)") + #labs(x = parse(text='"Total % OTU rel. abun. "*italic("K. alysoides")'), y = "Time in centre (s)") + labs(x = NULL, y = "Time in centre (s)") + theme(legend.position="right") + #facet_grid(~group) + #, scales="free_x", space="free_x") + facet_wrap(~group, scales = "free_y") + #, scales="free_x", space="free_x") + theme( plot.margin = margin(t = 5, r = 5, b = 0, l = 5, "pt"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.ticks.length=unit(-3, "pt"), axis.text.x = element_text(margin=margin(t = 5,r = 5,b = 2,l = 5,"pt"), vjust=0.5, size = rel(0.7)), axis.text.y = element_text(margin=margin(t = 5,r = 5,b = 5,l = 5,"pt"), size = rel(0.6)), #axis.title.x = element_text(margin=margin(t = 2,r = 0,b = 0,l = 0,"pt"), size = rel(0.8)), axis.title.x = element_blank(), axis.title.y = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.8)), strip.background = element_rect(fill="white", linetype = "blank"), strip.text = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.7)), legend.box.margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), legend.key.size = unit(5, "pt"), legend.text = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.6)), legend.title = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.7)), legend.margin = margin(t = 0,r = 0,b = 5,l = 0,"pt"), legend.box.spacing = unit(5, "pt") ) p grid.text(label = "B" , x = unit(0.03, "npc") , y = unit(0.95,"npc"), gp=gpar(fontsize=11, fontface="bold")) dev.print(tiff, file = paste0("plots/","Total-OTU-rel-abun-KA-vs-Base-anxiety-Anxious-vs-Less-anxious-B-vFINAL.tiff"), width = 12, height = 5, units = "cm", res=600, compression="lzw") ### Plot X most anxious in males ### Analyse total % of K. alysoides-like OTUs in female mice with the lowest Open Field centre times ## use % OTU relative abundance phyloseq objects dim( relabun.fecalw7.Cont.dc.16s@sam_data ) # 18 20 dim( relabun.cecal.Cont.dc.16s@sam_data ) # 18 20 dim( relabun.fecalw7.Low.dc.16s@sam_data ) # 17 20 dim( relabun.cecal.Low.dc.16s@sam_data ) # 17 20 dim( relabun.fecalw7.High.dc.16s@sam_data ) # 18 20 dim( relabun.cecal.High.dc.16s@sam_data ) # 18 20 ## identify subjects str(of.post.COPY) sel <- which(of.post.COPY$Sex=="Male") beh_male <- of.post.COPY[sel, ] str(beh_male) #'data.frame': 27 obs. of 38 variables: dat <- data.frame(mouseID = c(as.character(relabun.fecalw7.Cont.dc.16s@sam_data$mouseID), as.character(relabun.cecal.Cont.dc.16s@sam_data$mouseID), as.character(relabun.fecalw7.Low.dc.16s@sam_data$mouseID), as.character(relabun.cecal.Low.dc.16s@sam_data$mouseID), as.character(relabun.fecalw7.High.dc.16s@sam_data$mouseID), as.character(relabun.cecal.High.dc.16s@sam_data$mouseID)), Treatment = c(rep("Control",times=18*2), rep("Low",times=17*2), rep("High",times=18*2)), Sample_type = c(rep("Faecal w7",times=18),rep("Caecal",times=18), rep("Faecal w7",times=17),rep("Caecal",times=17), rep("Faecal w7",times=18),rep("Caecal",times=18)) ) dat$percent_all_KA <- NA dat$Centre_time <- NA ## list all KA-like OTUs all_KA_OTUs <- unique( c( as.character( df_blast.Cont.all_KA$otu ), as.character( df_blast.Low.all_KA$otu ), as.character( df_blast.High.all_KA$otu ) ) ) all_KA_OTUs # [1] "OTU_143" "OTU_481" "OTU_531" "OTU_5971" "OTU_9505" "OTU_9639" "OTU_37" "OTU_60" "OTU_5520" # [10] "OTU_5790" names(dat) # "mouseID" "Treatment" "Sample_type" "percent_all_KA" "Centre_time" for (i in 1:dim(dat)[1]) { # i<-1 if (dat$Treatment[i]=="Control" & dat$Sample_type[i]=="Faecal w7") { phy_obj <- relabun.fecalw7.Cont.dc.16s } else if (dat$Treatment[i]=="Control" & dat$Sample_type[i]=="Caecal") { phy_obj <- relabun.cecal.Cont.dc.16s } else if (dat$Treatment[i]=="Low" & dat$Sample_type[i]=="Faecal w7") { phy_obj <- relabun.fecalw7.Low.dc.16s } else if (dat$Treatment[i]=="Low" & dat$Sample_type[i]=="Caecal") { phy_obj <- relabun.cecal.Low.dc.16s } else if (dat$Treatment[i]=="High" & dat$Sample_type[i]=="Faecal w7") { phy_obj <- relabun.fecalw7.High.dc.16s # else: #(dat$Treatment[i]=="High" & dat$Sample_type[i]=="Caecal") } else { phy_obj <- relabun.cecal.High.dc.16s } this_subject <- as.character( dat$mouseID[i] ) # this_subject: "C1m3" sel_phy_obj <- prune_samples( phy_obj@sam_data$mouseID == this_subject, phy_obj ) # sample_names(sel_phy_obj): "C1m3T16" sel_KA <- prune_taxa( taxa_names(sel_phy_obj) %in% all_KA_OTUs, sel_phy_obj ) #sel_KA@otu_table dat$percent_all_KA[i] <- sum( taxa_sums(sel_KA@otu_table) ) sel <- which(as.character(beh_male$ID) == as.character(dat$mouseID[i])) if (length(sel)==1) { dat$Centre_time[i] <- beh_male$Centre_time[sel] } print(paste0("mouse: ", dat$mouseID[i]," ",beh_male$ID[sel] )) } ## now females are not present in 'dat' ok <- complete.cases(dat) sel <- which(ok==TRUE) dat <- dat[sel, ] dat$lowest_2 <- NA dat$lowest_3 <- NA dat$lowest_4 <- NA dat$lowest_5 <- NA x <- 2 sel <- which(dat$Treatment=="Control" & dat$Sample_type=="Faecal w7") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Faecal w7" sel <- which(dat$Treatment=="Control" & dat$Sample_type=="Caecal") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Caecal" sel <- which(dat$Treatment=="Low" & dat$Sample_type=="Faecal w7") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Faecal w7" sel <- which(dat$Treatment=="Low" & dat$Sample_type=="Caecal") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Caecal" sel <- which(dat$Treatment=="High" & dat$Sample_type=="Faecal w7") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Faecal w7" sel <- which(dat$Treatment=="High" & dat$Sample_type=="Caecal") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Caecal" choose <- which(dat$lowest_2=="Faecal w7") plot(dat$percent_all_KA[choose], dat$Centre_time[choose]) choose <- which(dat$lowest_2=="Caecal") plot(dat$percent_all_KA[choose], dat$Centre_time[choose]) # Faecal w7 cor(dat$percent_all_KA[choose], dat$Centre_time[choose]) # 0.4160884 # Caecal cor(dat$percent_all_KA[choose], dat$Centre_time[choose]) # 0.6276367 # # # x <- 3 sel <- which(dat$Treatment=="Control" & dat$Sample_type=="Faecal w7") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Faecal w7" sel <- which(dat$Treatment=="Control" & dat$Sample_type=="Caecal") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Caecal" sel <- which(dat$Treatment=="Low" & dat$Sample_type=="Faecal w7") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Faecal w7" sel <- which(dat$Treatment=="Low" & dat$Sample_type=="Caecal") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Caecal" sel <- which(dat$Treatment=="High" & dat$Sample_type=="Faecal w7") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Faecal w7" sel <- which(dat$Treatment=="High" & dat$Sample_type=="Caecal") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Caecal" choose <- which(dat$lowest_3=="Faecal w7") plot(dat$percent_all_KA[choose], dat$Centre_time[choose]) choose <- which(dat$lowest_3=="Caecal") plot(dat$percent_all_KA[choose], dat$Centre_time[choose]) # Faecal w7 cor(dat$percent_all_KA[choose], dat$Centre_time[choose]) # -0.05388064 # Caecal cor(dat$percent_all_KA[choose], dat$Centre_time[choose]) # 0.4841859 # # # x <- 4 sel <- which(dat$Treatment=="Control" & dat$Sample_type=="Faecal w7") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Faecal w7" sel <- which(dat$Treatment=="Control" & dat$Sample_type=="Caecal") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Caecal" sel <- which(dat$Treatment=="Low" & dat$Sample_type=="Faecal w7") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Faecal w7" sel <- which(dat$Treatment=="Low" & dat$Sample_type=="Caecal") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Caecal" sel <- which(dat$Treatment=="High" & dat$Sample_type=="Faecal w7") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Faecal w7" sel <- which(dat$Treatment=="High" & dat$Sample_type=="Caecal") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Caecal" choose <- which(dat$lowest_4=="Faecal w7") plot(dat$percent_all_KA[choose], dat$Centre_time[choose]) choose <- which(dat$lowest_4=="Caecal") plot(dat$percent_all_KA[choose], dat$Centre_time[choose]) # Faecal w7 cor(dat$percent_all_KA[choose], dat$Centre_time[choose]) # -0.1830594 # Caecal cor(dat$percent_all_KA[choose], dat$Centre_time[choose]) # 0.6719421 # # # x <- 5 sel <- which(dat$Treatment=="Control" & dat$Sample_type=="Faecal w7") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Faecal w7" sel <- which(dat$Treatment=="Control" & dat$Sample_type=="Caecal") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Caecal" sel <- which(dat$Treatment=="Low" & dat$Sample_type=="Faecal w7") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Faecal w7" sel <- which(dat$Treatment=="Low" & dat$Sample_type=="Caecal") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Caecal" sel <- which(dat$Treatment=="High" & dat$Sample_type=="Faecal w7") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Faecal w7" sel <- which(dat$Treatment=="High" & dat$Sample_type=="Caecal") subsel <- which(dat$Centre_time[sel] %in% sort(dat$Centre_time[sel])[1:x]) dat[sel[subsel], paste0("lowest_",x)] <- "Caecal" choose <- which(dat$lowest_5=="Faecal w7") plot(dat$percent_all_KA[choose], dat$Centre_time[choose]) choose <- which(dat$lowest_5=="Caecal") plot(dat$percent_all_KA[choose], dat$Centre_time[choose]) # Faecal w7 cor(dat$percent_all_KA[choose], dat$Centre_time[choose]) # -0.08933896 # Caecal cor(dat$percent_all_KA[choose], dat$Centre_time[choose]) # 0.5919562 ## Poor relationship found for faecal w7. ## Strip back to Caecal samples dat_c <- dat[ which(dat$Sample_type=="Caecal"), ] names(dat_c) # [1] "mouseID" "Treatment" "Sample_type" "percent_all_KA" "Centre_time" "lowest_2" # [7] "lowest_3" "lowest_4" "lowest_5" sel <- which(dat_c$lowest_2=="Caecal") dat_c$lowest_2[sel] <- 2 sel <- which(dat_c$lowest_3=="Caecal") dat_c$lowest_3[sel] <- 3 sel <- which(dat_c$lowest_4=="Caecal") dat_c$lowest_4[sel] <- 4 sel <- which(dat_c$lowest_5=="Caecal") dat_c$lowest_5[sel] <- 5 ## reformat for plotting with ggplot facet_grid names(dat_c) # [1] "mouseID" "Treatment" "Sample_type" "percent_all_KA" "Centre_time" "lowest_2" # [7] "lowest_3" "lowest_4" "lowest_5" new_names <- c("mouseID", "Treatment", "Sample_type", "percent_all_KA", "Centre_time", "lowest_X") get_cols <- c("lowest_2", "lowest_3", "lowest_4", "lowest_5") most_anxious_x <- c(2:5) length(most_anxious_x) # 4 dat_c.plot <- data.frame(matrix(nrow = 1, ncol = length( new_names ))) names(dat_c.plot) <- new_names for (i in 1:length(most_anxious_x)) { #i<-1 # getsubset of dataframe which examines most anxious X mice in each treatment dat_sel <- dat_c[ ,c( names(dat_c)[1:5],get_cols[i] ) ] subsel <- which(dat_sel[ , get_cols[i] ] == most_anxious_x[i]) names(dat_sel) <- names(dat_c.plot) dat_c.plot <- rbind(dat_c.plot, dat_sel[subsel, ]) } # remove NA first row dat_c.plot <- dat_c.plot[-1, ] str(dat_c.plot) # 'data.frame': 42 obs. of 6 variables: # $ mouseID : chr "C6m3" "C1m1" "L2m2" "L4m1" ... # $ Treatment : chr "Control" "Control" "Low" "Low" ... # $ Sample_type : chr "Caecal" "Caecal" "Caecal" "Caecal" ... # $ percent_all_KA: num 2.069 0.679 1.269 1.397 0.708 ... # $ Centre_time : num 21.8 13.2 9 11.4 13.8 21.2 37.5 21.8 13.2 13.2 ... # $ lowest_X : chr "2" "2" "2" "2" ... 3*2 + 3*3 + 3*4 + 3*5 # 42 temp <- dat_c.plot cols <- c( "Control" ="#f46d43", "Low" ="#66c2a5", "High" ="#5e4fa2" ) levels(factor(dat_c.plot$lowest_X)) # [1] "2" "3" "4" "5" #dat_c.plot$lowest_X <- factor(dat_c.plot$lowest_X, levels = c("2", "3", "4", "5"), labels = c("2 most\nanxious", "3 most\nanxious", "4 most\nanxious", "5 most\nanxious") ) dat_c.plot$lowest_X <- factor(dat_c.plot$lowest_X, levels = c("2", "3", "4", "5"), labels = c("Male\n2 most anxious", "Male\n3 most anxious", "Male\n4 most anxious", "Male\n5 most anxious") ) # annotation for r and n anno <- data.frame(x = c(2.0, 2.0, 2.0, 2.0), y = c(0, 0, 0, 0), #lowest_X = c("2 most\nanxious", "3 most\nanxious", "4 most\nanxious", "5 most\nanxious"), lowest_X = c("Male\n2 most anxious", "Male\n3 most anxious", "Male\n4 most anxious", "Male\n5 most anxious"), r = paste0("r = ", round( c(0.6276367, 0.4841859, 0.6719421, 0.5919562),digits = 2 )), n = paste0("n = ", c(2*3, 3*3, 4*3, 5*3)) ) p <- ggplot(data=dat_c.plot, aes(x=percent_all_KA, y= Centre_time, color = Treatment)) + geom_point(size=1) + scale_colour_manual(values = cols) + #stat_smooth(aes(x=percent_all_KA, y= Centre_time), inherit.aes = FALSE, show.legend = FALSE) + geom_smooth(aes(x=percent_all_KA, y= Centre_time), method='lm', inherit.aes = FALSE, show.legend = FALSE, fill = "#bdbdbd", size = 0.5) + #geom_text( data = anno, aes(x=x, y=y, label = r), size = 2.5, inherit.aes = FALSE) + geom_text( data = anno, aes(x=x, y=y, label = paste0(r,"\n",n)), size = 2.5, inherit.aes = FALSE, lineheight=0.9) + theme_classic() + #labs(x = "Total % OTU rel. abun. K. alysoides", y = "Time in centre (s)") + labs(x = parse(text='"Total % OTU rel. abun. "*italic("K. alysoides")'), y = "Time in centre (s)") + #theme(legend.position="bottom") + theme(legend.position="none") + facet_grid(~lowest_X) + #, scales="free_x", space="free_x") + theme( plot.margin = margin(t = 5, r = 5, b = 0, l = 5, "pt"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.ticks.length=unit(-3, "pt"), axis.text.x = element_text(margin=margin(t = 5,r = 5,b = 2,l = 5,"pt"), vjust=0.5, size = rel(0.7)), axis.text.y = element_text(margin=margin(t = 5,r = 5,b = 5,l = 5,"pt"), size = rel(0.6)), axis.title.x = element_text(margin=margin(t = 2,r = 0,b = 0,l = 0,"pt"), size = rel(0.8)), axis.title.y = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.8)), strip.background = element_rect(fill="white", linetype = "blank"), strip.text = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.7)) #, # legend.box.margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), # legend.key.size = unit(5, "pt"), # legend.text = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.6)), # legend.title = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.7)), # legend.margin = margin(t = 0,r = 0,b = 5,l = 0,"pt"), # legend.box.spacing = unit(5, "pt") ) p grid.text(label = "C" , x = unit(0.03, "npc") , y = unit(0.95,"npc"), gp=gpar(fontsize=11, fontface="bold") ) dev.print(tiff, file = paste0("plots/","Total-OTU-rel-abun-KA-vs-Base-anxiety-Most-anxious-in-Treatments-MALES-vFINAL.tiff"), width = 12, height = 5, units = "cm", res=600, compression="lzw") ### Compare all K. alysoides that increase between Wk0 to Wk7 ### examine quantity and relative abundance of OTUs unique(clean.16s@sam_data$samp_type) # "cecal" "fecal" "air" "soil" "fresh bedding" fecalw0w7cecal.dc.16s <- prune_samples( clean.16s@sam_data$samp_type %in% c("fecal","cecal"), clean.16s ) fecalw0w7cecal.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 2730 taxa and 160 samples ] # sample_data() Sample Data: [ 160 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 2730 taxa by 7 taxonomic ranks ] min(sample_sums(fecalw0w7cecal.dc.16s)) # 11450 min(taxa_sums(fecalw0w7cecal.dc.16s)) # 0 # prune taxa that have zero sequence reads fecalw0w7cecal.dc.16s <- prune_taxa(taxa = taxa_sums(fecalw0w7cecal.dc.16s) > 0, x = fecalw0w7cecal.dc.16s) fecalw0w7cecal.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 817 taxa and 160 samples ] # sample_data() Sample Data: [ 160 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 817 taxa by 7 taxonomic ranks ] min(taxa_sums(fecalw0w7cecal.dc.16s)) # 1 table(fecalw0w7cecal.dc.16s@sam_data$Cage.Name) # C1 C2 C3 C4 C5 C6 H1 H2 H3 H4 H5 H6 L1 L2 L3 L4 L5 L6 # 9 9 9 9 9 9 9 9 9 9 9 9 8 8 9 9 9 9 table(fecalw0w7cecal.dc.16s@sam_data$Sex) # female male # 80 80 ## Normalise by converting to relative abundance ... phy_obj <- fecalw0w7cecal.dc.16s relabun.phy_obj <- transform_sample_counts(phy_obj, function(x) 100*x / sum(x) ) relabun.phy_obj # phyloseq-class experiment-level object # otu_table() OTU Table: [ 817 taxa and 160 samples ] # sample_data() Sample Data: [ 160 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 817 taxa by 7 taxonomic ranks ] # prune taxa - keep only K. alysoides that increase between Wk0 to Wk7 all_KA_OTUs # [1] "OTU_143" "OTU_481" "OTU_531" "OTU_5971" "OTU_9505" "OTU_9639" "OTU_37" "OTU_60" "OTU_5520" # [10] "OTU_5790" KAonly.relabun.phy_obj <- prune_taxa( taxa_names(relabun.phy_obj) %in% all_KA_OTUs, relabun.phy_obj ) KAonly.relabun.phy_obj # phyloseq-class experiment-level object # otu_table() OTU Table: [ 10 taxa and 160 samples ] # sample_data() Sample Data: [ 160 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 10 taxa by 7 taxonomic ranks ] dat <- data.frame(samp_id=sample_names(KAonly.relabun.phy_obj) ) dat$Treatment <- substring(text = dat$samp_id, first = 1, last = 1) dat$Treatment <- factor(dat$Treatment, levels=c("C","L","H"), labels = c("Control","Low","High"), ordered = TRUE) dat$mouseID <- substring(text = dat$samp_id, first = 1, last = 4) str(mice_info) dat <- merge.data.frame(x=dat, y=mice_info[ ,c("mouseID","Sex")], by = "mouseID" ) dat$Sex <- factor(dat$Sex, levels=c("female","male"), labels=c("Female","Male"), ordered = TRUE) dat$samp_type <- NA dat$Samp_group <- NA sel <- grep(pattern = "T01", x = dat$samp_id) dat$samp_type[sel] <- "Faecal w0" dat$Samp_group[sel] <- "Faecal" sel <- grep(pattern = "T16", x = dat$samp_id) dat$samp_type[sel] <- "Faecal w7" dat$Samp_group[sel] <- "Faecal" sel <- grep(pattern = "Ce", x = dat$samp_id) dat$samp_type[sel] <- "Caecal" dat$Samp_group[sel] <- "Caecal" #dat$group <- paste0(dat$Sex,"\n",dat$Samp_group) dat$group <- paste0(dat$Sex," ",dat$Samp_group) str(dat) # 'data.frame': 160 obs. of 7 variables: # $ mouseID : chr "C1m1" "C1m1" "C1m1" "C1m2" ... # $ samp_id : Factor w/ 160 levels "C1m1Ce","C1m1T01",..: 1 2 3 5 4 6 8 9 7 10 ... # $ Treatment : Ord.factor w/ 3 levels "Control"<"Low"<..: 1 1 1 1 1 1 1 1 1 1 ... # $ Sex : Ord.factor w/ 2 levels "Female"<"Male": 2 2 2 2 2 2 2 2 2 1 ... # $ samp_type : chr "Caecal" "Faecal w0" "Faecal w7" "Faecal w0" ... # $ Samp_group: chr "Caecal" "Faecal" "Faecal" "Faecal" ... # $ group : chr "Male\nCaecal" "Male\nFaecal" "Male\nFaecal" "Male\nFaecal" ... dim(dat) # 160 7 ## now get individual taxa taxa_names(KAonly.relabun.phy_obj) # [1] "OTU_37" "OTU_60" "OTU_143" "OTU_481" "OTU_531" "OTU_5520" "OTU_5790" "OTU_5971" "OTU_9505" # [10] "OTU_9639" dat$OTU_37 <- NA dat$OTU_60 <- NA dat$OTU_143 <- NA dat$OTU_481 <- NA dat$OTU_531 <- NA dat$OTU_5520 <- NA dat$OTU_5790 <- NA dat$OTU_5971 <- NA dat$OTU_9505 <- NA dat$OTU_9639 <- NA # for (s in 1:length(dat$samp_id)) { # #s<-1 # this_samp <- prune_samples( sample_names(KAonly.relabun.phy_obj) == as.character(dat$samp_id[s]), KAonly.relabun.phy_obj ) # dat[ s , taxa_names(KAonly.relabun.phy_obj) ] <- as.numeric( taxa_sums(this_samp) ) # } KA_taxa_names <- taxa_names(KAonly.relabun.phy_obj) for (s in 1:length(dat$samp_id)) { #s<-1 this_samp <- prune_samples( sample_names(KAonly.relabun.phy_obj) == as.character(dat$samp_id[s]), KAonly.relabun.phy_obj ) # consider only the K. alysoides that increase within each treatment treatment <- as.character( dat$Treatment[s] ) if (treatment == "Control") { KA_list <- df_blast.Cont.all_KA$otu } else if (treatment == "Low") { KA_list <- df_blast.Low.all_KA$otu } else { KA_list <- df_blast.High.all_KA$otu } subsel <- which(KA_taxa_names %in% KA_list) dat[ s , KA_taxa_names[subsel] ] <- as.numeric( taxa_sums(this_samp) )[subsel] # set NA values as zero dat[ s , KA_taxa_names[-subsel] ] <- 0 } dat$Sex_Treatment_Samp_type <- paste0(dat$Sex,"_",dat$Treatment,"_",dat$samp_type) batches <- unique(dat$Sex_Treatment_Samp_type) batches # [1] "Male_Control_Caecal" "Male_Control_Faecal w0" "Male_Control_Faecal w7" "Female_Control_Caecal" # [5] "Female_Control_Faecal w0" "Female_Control_Faecal w7" "Male_High_Caecal" "Male_High_Faecal w7" # [9] "Male_High_Faecal w0" "Female_High_Faecal w0" "Female_High_Faecal w7" "Female_High_Caecal" # [13] "Female_Low_Caecal" "Female_Low_Faecal w0" "Female_Low_Faecal w7" "Male_Low_Caecal" # [17] "Male_Low_Faecal w7" "Male_Low_Faecal w0" ## retrieve data in these batches datnew <- data.frame(otu=NA,Treatment=NA,rel_abun_mean=NA,n_nonzero=NA,n=NA,samp_type=NA,percent_prev=NA,Sex_Treatment_Samp_type=NA,group=NA) names(datnew) # [1] "otu" "Treatment" "rel_abun_mean" "n_nonzero" # [5] "n" "samp_type" "percent_prev" "Sex_Treatment_Samp_type" # [9] "group" for (i in 1:length(batches)) { #i<-1 sel <- which(dat$Sex_Treatment_Samp_type %in% batches[i]) for (z in 1:length( taxa_names(KAonly.relabun.phy_obj) )) { #z<-1 this_otu <- taxa_names(KAonly.relabun.phy_obj)[z] rownew <- data.frame(otu=this_otu,Treatment=NA,rel_abun_mean=NA,n_nonzero=NA,n=NA,samp_type=NA,percent_prev=NA,Sex_Treatment_Samp_type=NA,group=NA) rownew$Treatment <- as.character( unique(dat$Treatment[sel]) ) rownew$rel_abun_mean <- mean(dat[ sel , this_otu ]) rownew$n_nonzero <- length(which( dat[ sel , this_otu ] >0 )) rownew$n <- length( dat[ sel , this_otu ] ) rownew$samp_type <- as.character( unique(dat$samp_type[sel]) ) rownew$percent_prev <- 100*rownew$n_nonzero/rownew$n rownew$Sex_Treatment_Samp_type <- as.character( unique(dat$Sex_Treatment_Samp_type[sel]) ) rownew$group <- as.character( unique(dat$group[sel]) ) datnew <- rbind(datnew, rownew) print(paste0("completed ",z, " of ", length( taxa_names(KAonly.relabun.phy_obj) ))) } print(paste0("------------------------")) print(paste0("completed ",i, " = batch: ", batches[i] )) print(paste0("------------------------")) } # remove NA 1st row datnew <- datnew[-1, ] # log10 mean rel. abun. datnew$log10_mean_rel_abun <- log10(datnew$rel_abun_mean) # lookup pident ? lkup <- rbind(df_blast.Cont.all_KA[ ,c("otu","pident")], df_blast.Low.all_KA[ ,c("otu","pident")], df_blast.High.all_KA[ ,c("otu","pident")]) sel.dup <-which(duplicated(lkup$otu)) lkup <- lkup[-sel.dup, ] temp <- datnew datnew <- temp datnew <- merge(x=datnew,y=lkup,by="otu", all.x=TRUE ) datnew$pident <- as.character(datnew$pident) # remove cases with 0% prevalence sel <- which(datnew$percent_prev==0) datnew <- datnew[-sel, ] str(datnew) # 'data.frame': 72 obs. of 11 variables: # $ otu : chr "OTU_143" "OTU_143" "OTU_143" "OTU_143" ... # $ Treatment : chr "Control" "Control" "Control" "Control" ... # $ rel_abun_mean : num 0.0616 0.00497 0.00171 0.04066 0.03048 ... # $ n_nonzero : int 6 1 2 3 4 6 5 9 9 9 ... # $ n : int 9 9 9 9 9 9 9 9 9 9 ... # $ samp_type : chr "Faecal w7" "Faecal w0" "Faecal w0" "Faecal w7" ... # $ percent_prev : num 66.7 11.1 22.2 33.3 44.4 ... # $ Sex_Treatment_Samp_type: chr "Male_Control_Faecal w7" "Female_Control_Faecal w0" "Male_Control_Faecal w0" "Female_Control_Faecal w7" ... # $ group : chr "Male\nFaecal" "Female\nFaecal" "Male\nFaecal" "Female\nFaecal" ... # $ log10_mean_rel_abun : num -1.21 -2.3 -2.77 -1.39 -1.52 ... # $ pident : chr "95" "95" "95" "95" ... unique(datnew$group) # "Male Faecal" "Female Faecal" "Female Caecal" "Male Caecal" datnew$group <- factor(datnew$group, levels = c( "Female Faecal", "Female Caecal", "Male Faecal", "Male Caecal" ), labels = c( "Female Faeces", "Female Caeca", "Male Faeces", "Male Caeca" ), ordered = TRUE) datnew$link <- paste0(datnew$otu,"_",datnew$Treatment,"_",datnew$group) cols <- c( "Control" ="#f46d43", "Low" ="#66c2a5", "High" ="#5e4fa2" ) shapes <- c( "Faecal w0"=1, "Faecal w7"=16, "Caecal"=17 ) pid_sizes <- c("92"= 1.5, "94"= 2, "95"= 2.5, "96"= 3, "97"= 3.5) str(datnew) temp <- datnew p <- ggplot(data=datnew, aes(x=log10_mean_rel_abun, y = percent_prev, color = Treatment, shape = samp_type)) + geom_point(aes(size = pident)) + geom_line(aes(group=link)) + scale_shape_manual(values = shapes, name = "Sample\ntype") + scale_size_manual(values = pid_sizes, name = "Percent\nidentity") + scale_color_manual(values = cols) + theme_classic() + labs(x = "log10(mean % OTU rel. abun.)", y = "Prevalence (%)" ) + facet_wrap(~group) + guides(colour = guide_legend(order = 1), shape = guide_legend(order = 2), size = guide_legend(order = 3)) + theme( plot.margin = margin(t = 2, r = 2, b = 2, l = 5, "pt"), axis.ticks.length=unit(-3, "pt"), axis.text.x = element_text(margin=margin(t = 5,r = 5,b = 2,l = 5,"pt"), size = rel(0.8)), # , size = rel(0.9)) axis.text.y = element_text(margin=margin(t = 5,r = 5,b = 5,l = 2,"pt"), size = rel(0.8)), axis.title.x = element_text(margin=margin(t = 5,r = 2,b = 5,l = 2,"pt"), size = rel(0.8)), axis.title.y = element_text(margin=margin(t = 5,r = 2,b = 5,l = 2,"pt"), size = rel(0.8)), strip.background = element_rect(fill="white", linetype = "blank"), strip.text = element_text(margin=margin(t = 0,r = 0,b = 5,l = 0,"pt"), size = rel(0.8)), legend.key.size = unit(2, "pt"), legend.text = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.6)), legend.title = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.6)), legend.margin = margin(t = 0,r = 0,b = 0,l = 2,"pt"), legend.box.spacing = unit(2, "pt"), legend.box.margin = margin(t = 0,r = 0,b = 0,l = 0,"pt"), legend.spacing = unit(4, "pt") ) p ggsave(plot=p, filename = paste0("plots/","All_KA_pc_prevalence-vs-log10-mean-pc-OTU-rel-abun-Male-Female-Faecal-Caecal-vFINAL.tiff"), width = 10, height = 10, units = "cm", dpi = 600, compression = "lzw") #------------------------ #### Composition Relative abundance bar plots #------------------------ # compute % OTU Relative Abundance for: soil.dc.16s air.dc.16s fecal.w1w7.dc.16s cecal.dc.16s ### Soils relabun.soil.dc.16s <- transform_sample_counts(soil.dc.16s, function(x) 100*x / sum(x) ) relabun.soil.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 2345 taxa and 65 samples ] # sample_data() Sample Data: [ 65 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 2345 taxa by 7 taxonomic ranks ] # agglomerate at level of Phylum relabun.soil.dc.16s <- tax_glom(physeq = relabun.soil.dc.16s, taxrank = "Phylum") relabun.soil.dc.16s@sam_data$sample relabun.soil.dc.16s@sam_data$Treatment # compare only homogenised treatment soils sel <- grep(pattern = "Clr|Rem", x = soil.dc.16s@sam_data$sample) soil.dc.16s@sam_data$sample[sel] soilw1w7 <- soil.dc.16s@sam_data$sample[-sel] soilw1w7 relabun.soilw1w7.dc.16s <- prune_samples( sample_names(relabun.soil.dc.16s) %in% soilw1w7, relabun.soil.dc.16s ) relabun.soilw1w7.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 24 taxa and 47 samples ] # sample_data() Sample Data: [ 47 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 24 taxa by 7 taxonomic ranks ] p.soil<-plot_bar( relabun.soilw1w7.dc.16s , x = "sample", fill = "Phylum", facet_grid = ~Treatment + Time ) # Identify which Phyla are rare? hist(p.soil$data$Abundance) levels(as.factor(as.character( p.soil$data$Phylum ))) # [1] "p__Acidobacteria" "p__Actinobacteria" "p__AD3" "p__Armatimonadetes" "p__Bacteroidetes" # [6] "p__BHI80-139" "p__Chlamydiae" "p__Chlorobi" "p__Chloroflexi" "p__Cyanobacteria" # [11] "p__Elusimicrobia" "p__Fibrobacteres" "p__Firmicutes" "p__Gemmatimonadetes" "p__GN02" # [16] "p__Nitrospirae" "p__OD1" "p__Planctomycetes" "p__Proteobacteria" "p__Tenericutes" # [21] "p__TM6" "p__TM7" "p__Verrucomicrobia" "p__WPS-2" levels(as.factor(as.character( p.soil$data$Phylum[which(p.soil$data$Abundance >= 0.5)] ))) # [1] "p__Acidobacteria" "p__Actinobacteria" "p__AD3" "p__Bacteroidetes" "p__Chloroflexi" # [6] "p__Firmicutes" "p__Gemmatimonadetes" "p__Planctomycetes" "p__Proteobacteria" "p__Verrucomicrobia" # [11] "p__WPS-2" pp <- p.soil + facet_grid(~Treatment+Time, scales="free_x") # , space="free_x" pp ## only display phyla >= 0.5% major_phyla <- levels(as.factor(as.character( p.soil$data$Phylum[which(p.soil$data$Abundance >= 0.5)] ))) sel.row <- which(p.soil$data$Phylum %in% major_phyla) 100*sum( p.soil$data$Abundance[sel.row] )/sum(p.soil$data$Abundance) # 99.66032 % of relative abundance is covered 100 - 99.66032 # 0.33968 % left representing rare phyla p.soil.plot <- p.soil class(p.soil.plot$data$Phylum) # "factor" p.soil.plot$data$Phylum <- as.character(p.soil.plot$data$Phylum) p.soil.plot$data$Phylum[-sel.row] <- "Other minor phyla" levels(factor(p.soil.plot$data$Phylum)) # [1] "Other minor phyla" "p__Acidobacteria" "p__Actinobacteria" "p__AD3" "p__Bacteroidetes" # [6] "p__Chloroflexi" "p__Firmicutes" "p__Gemmatimonadetes" "p__Planctomycetes" "p__Proteobacteria" # [11] "p__Verrucomicrobia" "p__WPS-2" p.soil.plot$data$Phylum <- factor(p.soil.plot$data$Phylum, levels = c( "p__Acidobacteria" , "p__Actinobacteria" , "p__AD3" , "p__Bacteroidetes" , "p__Chloroflexi" , "p__Firmicutes" , "p__Gemmatimonadetes", "p__Planctomycetes" , "p__Proteobacteria" , "p__Verrucomicrobia" , "p__WPS-2" , "Other minor phyla"), labels = c( "Acidobacteria" , "Actinobacteria" , "AD3" , "Bacteroidetes" , "Chloroflexi" , "Firmicutes" , "Gemmatimonadetes", "Planctomycetes" , "Proteobacteria" , "Verrucomicrobia" , "WPS-2" , "Other minor phyla" ),ordered = TRUE ) p.soil.plot$data$Treatment <- factor(p.soil.plot$data$Treatment, levels = c("Low","High"),ordered = TRUE) pp <- p.soil.plot + theme_classic() + facet_grid(~Treatment+Time, scales="free_x", margins = unit(2,"pt")) + scale_fill_manual(values = cols.phlya.16S) + ylab("OTU relative abundance (%)") + xlab(NULL) + geom_bar(stat = "identity", size=0.5) + scale_y_continuous(expand = expand_scale(mult = .01)) + # default .05 scale_x_discrete(expand = expand_scale(add = .1)) + # default .6 theme( rect = element_rect(size = 0.5), plot.margin = margin(t = 2, r = 2, b = 2, l = 5, "pt"), axis.text.x = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), angle=90,size = rel(0.7), vjust=0.5 ), # , hjust=0.5, axis.text.y = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.7)), axis.title.y = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.7)), strip.background = element_rect(fill="white", linetype = "blank"), strip.text = element_text(margin=margin(t = 0,r = 0,b = 2,l = 0,"pt"), vjust = 0, size = rel(0.7)), axis.line.x.bottom = element_blank(), axis.line.y.left = element_blank(), axis.ticks.x.bottom = element_blank(), legend.box.margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), legend.key.size = unit(5, "pt"), legend.text = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.5)), legend.title = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.7)), legend.margin = margin(t = 0,r = 0,b = 0,l = 0,"pt"), legend.box.spacing = unit(5, "pt") ) pp$data$sample <- substring(pp$data$Sample, first =1, last =4) pp grid.text(label = "A", x = unit(0.015, "npc") , y = unit(0.96,"npc"), gp=gpar(fontsize=11, fontface="bold") ) dev.print(tiff, file = paste0("plots/","Phylum-barplots-Soils-vFINAL.tiff"), width = 18, height = 5.8, units = "cm", res=600, compression="lzw") ### Air relabun.air.dc.16s <- transform_sample_counts(air.dc.16s, function(x) 100*x / sum(x) ) relabun.air.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 2073 taxa and 72 samples ] # sample_data() Sample Data: [ 72 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 2073 taxa by 7 taxonomic ranks ] # agglomerate at level of Phylum relabun.air.dc.16s <- tax_glom(physeq = relabun.air.dc.16s, taxrank = "Phylum") relabun.air.dc.16s@sam_data$sample relabun.air.dc.16s@sam_data$Treatment p.air<-plot_bar( relabun.air.dc.16s , x = "sample", fill = "Phylum", facet_grid = ~Treatment + Time ) # Identify which Phyla are rare? hist(p.air$data$Abundance) levels(as.factor(as.character( p.air$data$Phylum ))) # [1] "p__Acidobacteria" "p__Actinobacteria" "p__AD3" "p__Armatimonadetes" "p__Bacteroidetes" # [6] "p__BHI80-139" "p__Chlamydiae" "p__Chlorobi" "p__Chloroflexi" "p__Cyanobacteria" # [11] "p__Deferribacteres" "p__Elusimicrobia" "p__Fibrobacteres" "p__Firmicutes" "p__Gemmatimonadetes" # [16] "p__GN02" "p__Nitrospirae" "p__OD1" "p__Planctomycetes" "p__Proteobacteria" # [21] "p__Tenericutes" "p__TM6" "p__TM7" "p__Verrucomicrobia" "p__WPS-2" levels(as.factor(as.character( p.air$data$Phylum[which(p.air$data$Abundance >= 0.5)] ))) # [1] "p__Acidobacteria" "p__Actinobacteria" "p__AD3" "p__Bacteroidetes" "p__Chloroflexi" # [6] "p__Cyanobacteria" "p__Deferribacteres" "p__Fibrobacteres" "p__Firmicutes" "p__Gemmatimonadetes" # [11] "p__GN02" "p__Planctomycetes" "p__Proteobacteria" "p__Tenericutes" "p__TM6" # [16] "p__TM7" "p__Verrucomicrobia" "p__WPS-2" pp <- p.air + facet_grid(~Treatment+Time, scales="free_x") # , space="free_x" pp ## only display phyla >= 0.5% major_phyla <- levels(as.factor(as.character( p.air$data$Phylum[which(p.air$data$Abundance >= 0.5)] ))) sel.row <- which(p.air$data$Phylum %in% major_phyla) 100*sum( p.air$data$Abundance[sel.row] )/sum(p.air$data$Abundance) # 99.89774 % of relative abundance is covered 100 - 99.89774 # 0.10226 % left representing rare phyla p.air.plot <- p.air class(p.air.plot$data$Phylum) # "factor" p.air.plot$data$Phylum <- as.character(p.air.plot$data$Phylum) p.air.plot$data$Phylum[-sel.row] <- "Other minor phyla" levels(factor(p.air.plot$data$Phylum)) # [1] "Other minor phyla" "p__Acidobacteria" "p__Actinobacteria" "p__AD3" "p__Bacteroidetes" # [6] "p__Chloroflexi" "p__Cyanobacteria" "p__Deferribacteres" "p__Fibrobacteres" "p__Firmicutes" # [11] "p__Gemmatimonadetes" "p__GN02" "p__Planctomycetes" "p__Proteobacteria" "p__Tenericutes" # [16] "p__TM6" "p__TM7" "p__Verrucomicrobia" "p__WPS-2" p.air.plot$data$Phylum <- factor(p.air.plot$data$Phylum, levels = c( "p__Acidobacteria" , "p__Actinobacteria", "p__AD3" , "p__Bacteroidetes" , "p__Chloroflexi" , "p__Cyanobacteria" , "p__Deferribacteres", "p__Fibrobacteres" , "p__Firmicutes" , "p__Gemmatimonadetes", "p__GN02" , "p__Planctomycetes" , "p__Proteobacteria" ,"p__Tenericutes" , "p__TM6" ,"p__TM7" ,"p__Verrucomicrobia" ,"p__WPS-2", "Other minor phyla"), labels = c( "Acidobacteria" , "Actinobacteria", "AD3" , "Bacteroidetes" , "Chloroflexi" , "Cyanobacteria" , "Deferribacteres", "Fibrobacteres" , "Firmicutes" , "Gemmatimonadetes", "GN02" , "Planctomycetes" , "Proteobacteria" ,"Tenericutes" , "TM6" ,"TM7" ,"Verrucomicrobia" ,"WPS-2", "Other minor phyla" ),ordered = TRUE ) p.air.plot$data$Treatment <- factor(p.air.plot$data$Treatment, levels = c("Control","Low","High"),ordered = TRUE) pp <- p.air.plot + theme_classic() + facet_grid(~Treatment+Time, scales="free_x", margins = unit(2,"pt")) + scale_fill_manual(values = cols.phlya.16S) + ylab("OTU relative abundance (%)") + xlab(NULL) + geom_bar(stat = "identity", size=0.5) + scale_y_continuous(expand = expand_scale(mult = .01)) + # default .05 scale_x_discrete(expand = expand_scale(add = .1)) + # default .6 theme( rect = element_rect(size = 0.5), plot.margin = margin(t = 2, r = 2, b = 2, l = 5, "pt"), axis.text.x = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), angle=90,size = rel(0.7), vjust=0.5 ), # , hjust=0.5, axis.text.y = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.7)), axis.title.y = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.7)), strip.background = element_rect(fill="white", linetype = "blank"), strip.text = element_text(margin=margin(t = 0,r = 0,b = 2,l = 0,"pt"), vjust = 0, size = rel(0.7)), axis.line.x.bottom = element_blank(), axis.line.y.left = element_blank(), axis.ticks.x.bottom = element_blank(), legend.box.margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), legend.key.size = unit(5, "pt"), legend.text = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.5)), legend.title = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.7)), legend.margin = margin(t = 0,r = 0,b = 0,l = 0,"pt"), legend.box.spacing = unit(5, "pt") ) pp$data$sample <- substring(pp$data$Sample, first =1, last =4) pp grid.text(label = "B", x = unit(0.015, "npc") , y = unit(0.96,"npc"), gp=gpar(fontsize=11, fontface="bold") ) dev.print(tiff, file = paste0("plots/","Phylum-barplots-Air-B-vFINAL.tiff"), width = 18, height = 5.8, units = "cm", res=600, compression="lzw") ### Fecal relabun.fecal.w0w7.dc.16s <- transform_sample_counts(fecal.w0w7.dc.16s, function(x) 100*x / sum(x) ) relabun.fecal.w0w7.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 800 taxa and 107 samples ] # sample_data() Sample Data: [ 107 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 800 taxa by 7 taxonomic ranks ] # agglomerate at level of Phylum relabun.fecal.w0w7.dc.16s <- tax_glom(physeq = relabun.fecal.w0w7.dc.16s, taxrank = "Phylum") relabun.fecal.w0w7.dc.16s@sam_data$sample relabun.fecal.w0w7.dc.16s@sam_data$Treatment p.fecal<-plot_bar( relabun.fecal.w0w7.dc.16s , x = "sample", fill = "Phylum", facet_grid = ~Treatment ) # Identify which Phyla are rare? hist(p.fecal$data$Abundance) levels(as.factor(as.character( p.fecal$data$Phylum ))) # [1] "p__Acidobacteria" "p__Actinobacteria" "p__Bacteroidetes" "p__Chloroflexi" "p__Cyanobacteria" # [6] "p__Deferribacteres" "p__Elusimicrobia" "p__Fibrobacteres" "p__Firmicutes" "p__Gemmatimonadetes" # [11] "p__OD1" "p__Planctomycetes" "p__Proteobacteria" "p__Tenericutes" "p__TM7" # [16] "p__Verrucomicrobia" "p__WPS-2" levels(as.factor(as.character( p.fecal$data$Phylum[which(p.fecal$data$Abundance >= 0.5)] ))) # [1] "p__Actinobacteria" "p__Bacteroidetes" "p__Deferribacteres" "p__Firmicutes" "p__Proteobacteria" pp <- p.fecal + facet_grid(~Treatment + Time, scales="free_x") # , space="free_x" pp ## only display phyla >= 0.5% major_phyla <- levels(as.factor(as.character( p.fecal$data$Phylum[which(p.fecal$data$Abundance >= 0.5)] ))) sel.row <- which(p.fecal$data$Phylum %in% major_phyla) 100*sum( p.fecal$data$Abundance[sel.row] )/sum(p.fecal$data$Abundance) # 99.98005 % of relative abundance is covered 100 - 99.98005 # 0.01995 % left representing rare phyla p.fecal.plot <- p.fecal class(p.fecal.plot$data$Phylum) # "factor" p.fecal.plot$data$Phylum <- as.character(p.fecal.plot$data$Phylum) p.fecal.plot$data$Phylum[-sel.row] <- "Other minor phyla" levels(factor(p.fecal.plot$data$Phylum)) # [1] "Other minor phyla" "p__Actinobacteria" "p__Bacteroidetes" "p__Deferribacteres" "p__Firmicutes" # [6] "p__Proteobacteria" p.fecal.plot$data$Phylum <- factor(p.fecal.plot$data$Phylum, levels = c( "p__Actinobacteria", "p__Bacteroidetes" , "p__Deferribacteres" , "p__Firmicutes" , "p__Proteobacteria" , "Other minor phyla"), labels = c( "Actinobacteria", "Bacteroidetes" , "Deferribacteres" , "Firmicutes" , "Proteobacteria" , "Other minor phyla" ),ordered = TRUE ) p.fecal.plot$data$Treatment <- factor(p.fecal.plot$data$Treatment, levels = c("Control","Low","High"),ordered = TRUE) pp <- p.fecal.plot + theme_classic() + facet_grid(~Treatment+Time, scales="free_x", margins = unit(2,"pt")) + scale_fill_manual(values = cols.phlya.16S) + ylab("OTU relative abundance (%)") + xlab(NULL) + geom_bar(stat = "identity", size=0.5) + scale_y_continuous(expand = expand_scale(mult = .01)) + # default .05 scale_x_discrete(expand = expand_scale(add = .1)) + # default .6 theme( rect = element_rect(size = 0.5), plot.margin = margin(t = 2, r = 2, b = 2, l = 5, "pt"), axis.text.x = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), angle=90,size = rel(0.6), vjust=0.5), # , hjust=0.5, axis.text.y = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.7)), axis.title.y = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.7)), strip.background = element_rect(fill="white", linetype = "blank"), strip.text = element_text(margin=margin(t = 0,r = 0,b = 2,l = 0,"pt"), vjust = 0, size = rel(0.7)), axis.line.x.bottom = element_blank(), axis.line.y.left = element_blank(), axis.ticks.x.bottom = element_blank(), legend.box.margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), legend.key.size = unit(5, "pt"), legend.text = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.5)), legend.title = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.7)), legend.margin = margin(t = 0,r = 0,b = 0,l = 0,"pt"), legend.box.spacing = unit(5, "pt") ) pp$data$sample <- substring(pp$data$Sample, first =1, last =4) pp grid.text(label = "C", x = unit(0.015, "npc") , y = unit(0.96,"npc"), gp=gpar(fontsize=11, fontface="bold") ) dev.print(tiff, file = paste0("plots/","Phylum-barplots-Fecal-C-vFINAL.tiff"), width = 18, height = 5.8, units = "cm", res=600, compression="lzw") ### Cecal relabun.cecal.dc.16s <- transform_sample_counts(cecal.dc.16s, function(x) 100*x / sum(x) ) relabun.cecal.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 550 taxa and 53 samples ] # sample_data() Sample Data: [ 53 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 550 taxa by 7 taxonomic ranks ] # agglomerate at level of Phylum relabun.cecal.dc.16s <- tax_glom(physeq = relabun.cecal.dc.16s, taxrank = "Phylum") relabun.cecal.dc.16s@sam_data$sample relabun.cecal.dc.16s@sam_data$Treatment p.cecal<-plot_bar( relabun.cecal.dc.16s , x = "sample", fill = "Phylum", facet_grid = ~Treatment ) # Identify which Phyla are rare? hist(p.cecal$data$Abundance) levels(as.factor(as.character( p.cecal$data$Phylum ))) # [1] "p__Acidobacteria" "p__Actinobacteria" "p__Bacteroidetes" "p__Chloroflexi" "p__Cyanobacteria" # [6] "p__Deferribacteres" "p__Firmicutes" "p__Proteobacteria" "p__Tenericutes" "p__TM7" # [11] "p__Verrucomicrobia" levels(as.factor(as.character( p.cecal$data$Phylum[which(p.cecal$data$Abundance >= 0.5)] ))) #[1] "p__Actinobacteria" "p__Bacteroidetes" "p__Firmicutes" "p__Proteobacteria" "p__TM7" pp <- p.cecal + facet_grid(~Treatment, scales="free_x") # , space="free_x" pp ## only display phyla >= 0.5% major_phyla <- levels(as.factor(as.character( p.cecal$data$Phylum[which(p.cecal$data$Abundance >= 0.5)] ))) sel.row <- which(p.cecal$data$Phylum %in% major_phyla) 100*sum( p.cecal$data$Abundance[sel.row] )/sum(p.cecal$data$Abundance) # 99.9694 % of relative abundance is covered 100 - 99.9694 # 0.0306 % left representing rare phyla p.cecal.plot <- p.cecal class(p.cecal.plot$data$Phylum) # "factor" p.cecal.plot$data$Phylum <- as.character(p.cecal.plot$data$Phylum) p.cecal.plot$data$Phylum[-sel.row] <- "Other minor phyla" levels(factor(p.cecal.plot$data$Phylum)) # [1] "Other minor phyla" "p__Actinobacteria" "p__Bacteroidetes" "p__Firmicutes" "p__Proteobacteria" # [6] "p__TM7" p.cecal.plot$data$Phylum <- factor(p.cecal.plot$data$Phylum, levels = c( "p__Actinobacteria", "p__Bacteroidetes", "p__Firmicutes", "p__Proteobacteria", "p__TM7" , "Other minor phyla"), labels = c( "Actinobacteria", "Bacteroidetes", "Firmicutes", "Proteobacteria", "TM7" , "Other minor phyla" ),ordered = TRUE ) #p.cecal.plot$data$Treatment <- factor(p.cecal.plot$data$Treatment, levels = c("Control","Low","High"),ordered = TRUE) p.cecal.plot$data$Treatment <- factor(p.cecal.plot$data$Treatment, levels = c("Control","Low","High"), labels = c("Control\n","Low\n","High\n"),ordered = TRUE) pp <- p.cecal.plot + theme_classic() + facet_grid(~Treatment, scales="free_x", margins = unit(2,"pt")) + scale_fill_manual(values = cols.phlya.16S) + ylab("OTU relative abundance (%)") + xlab(NULL) + geom_bar(stat = "identity", size=0.5) + scale_y_continuous(expand = expand_scale(mult = .01)) + # default .05 scale_x_discrete(expand = expand_scale(add = .1)) + # default .6 theme( rect = element_rect(size = 0.5), plot.margin = margin(t = 2, r = 2, b = 2, l = 5, "pt"), axis.text.x = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), angle=90,size = rel(0.7), vjust=0.5 ), # , hjust=0.5, axis.text.y = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.7)), axis.title.y = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.7)), strip.background = element_rect(fill="white", linetype = "blank"), strip.text = element_text(margin=margin(t = 0,r = 0,b = 2,l = 0,"pt"), vjust = 0, size = rel(0.7)), axis.line.x.bottom = element_blank(), axis.line.y.left = element_blank(), axis.ticks.x.bottom = element_blank(), legend.box.margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), legend.key.size = unit(5, "pt"), legend.text = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.5)), legend.title = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.7)), legend.margin = margin(t = 0,r = 0,b = 0,l = 0,"pt"), legend.box.spacing = unit(5, "pt") ) pp$data$sample <- substring(pp$data$Sample, first =1, last =4) pp grid.text(label = "D", x = unit(0.015, "npc") , y = unit(0.96,"npc"), gp=gpar(fontsize=11, fontface="bold") ) dev.print(tiff, file = paste0("plots/","Phylum-barplots-Cecal-D-vFINAL.tiff"), width = 18, height = 5.8, units = "cm", res=600, compression="lzw") ### for plotting purposes all_major_phyla <- unique( c( levels(as.factor(as.character( p.soil$data$Phylum[which(p.soil$data$Abundance >= 0.5)] ))), levels(as.factor(as.character( p.air$data$Phylum[which(p.air$data$Abundance >= 0.5)] ))), levels(as.factor(as.character( p.fecal$data$Phylum[which(p.fecal$data$Abundance >= 0.5)] ))), levels(as.factor(as.character( p.cecal$data$Phylum[which(p.cecal$data$Abundance >= 0.5)] ))) )) length(all_major_phyla) # 18 all_major_phyla # [1] "p__Acidobacteria" "p__Actinobacteria" "p__AD3" "p__Bacteroidetes" "p__Chloroflexi" # [6] "p__Firmicutes" "p__Gemmatimonadetes" "p__Planctomycetes" "p__Proteobacteria" "p__Verrucomicrobia" # [11] "p__WPS-2" "p__Cyanobacteria" "p__Deferribacteres" "p__Fibrobacteres" "p__GN02" # [16] "p__Tenericutes" "p__TM6" "p__TM7" cols.phlya.16S <-c( # https://sashat.me/2017/01/11/list-of-20-simple-distinct-colors/ "Acidobacteria" ='#e6194b' , "Actinobacteria"='#3cb44b' , "AD3"='#bcf60c' , "Bacteroidetes" ='#4363d8', "Chloroflexi"= '#ffe119', "Firmicutes"= '#fabebe', "Gemmatimonadetes"='#46f0f0' , "Planctomycetes"='#f032e6' , "Proteobacteria" ='#f58231', "Verrucomicrobia" ='#911eb4', "WPS-2" ='#008080', "Cyanobacteria"='#9a6324' , "Deferribacteres"='#fffac8' , "Fibrobacteres"='#800000' , "GN02"='#aaffc3' , "Tenericutes"='#808000' , "TM6" ='#ffd8b1', "TM7"='#000075', "Other minor phyla" = '#a9a9a9' ) #------------------------ #### Fecal alpha diversity comparisons #### Includes comparison by Sex #### Fecal Week 0, Week 7, Difference between Week 7-Week 0 #### Keep all samples except when tracking individual change Week 0 > Week 7 #------------------------ fecal.w0w7.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 800 taxa and 107 samples ] # sample_data() Sample Data: [ 107 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 800 taxa by 7 taxonomic ranks ] table(fecal.w0w7.dc.16s@sam_data$Time) # Week 0 Week 7 # 54 53 table(fecal.w0w7.dc.16s@sam_data$mouseID) # C1m1 C1m2 C1m3 C2m1 C2m2 C2m3 C3m1 C3m2 C3m3 C4m1 C4m2 C4m3 C5m1 C5m2 C5m3 C6m1 C6m2 C6m3 H1m1 H1m2 # 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 # H1m3 H2m1 H2m2 H2m3 H3m1 H3m2 H3m3 H4m1 H4m2 H4m3 H5m1 H5m2 H5m3 H6m1 H6m2 H6m3 L1m1 L1m2 L1m3 L2m1 # 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 # L2m2 L2m3 L3m1 L3m2 L3m3 L4m1 L4m2 L4m3 L5m1 L5m2 L5m3 L6m1 L6m2 L6m3 # 2 2 2 2 2 2 2 2 2 2 2 2 2 2 phy_obj <- fecal.w0w7.dc.16s phy_obj # phyloseq-class experiment-level object # otu_table() OTU Table: [ 800 taxa and 107 samples ] # sample_data() Sample Data: [ 107 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 800 taxa by 7 taxonomic ranks ] min(sample_sums(phy_obj)) # 11450 min(taxa_sums(phy_obj)) # 1 table(phy_obj@sam_data$Cage.Name) # C1 C2 C3 C4 C5 C6 H1 H2 H3 H4 H5 H6 L1 L2 L3 L4 L5 L6 # 6 6 6 6 6 6 6 6 6 6 6 6 5 6 6 6 6 6 table(phy_obj@sam_data$Treatment) # Control High Low # 36 36 35 table(phy_obj@sam_data$Time) # Week 0 Week 7 # 54 53 ## Use lowest number of reads to create rarefied dataset seed <- 123 rare.phy_obj <- rarefy_even_depth(phy_obj, sample.size = min(sample_sums(phy_obj)), rngseed = seed, replace = FALSE, trimOTUs = TRUE, verbose = TRUE) sample_sums(rare.phy_obj) # all 11450 ntaxa(rare.phy_obj) # 658 nsamples(rare.phy_obj) # 107 shan <- plot_richness(rare.phy_obj, measures=c("Shannon")) str(shan) out <- data.frame(sample=shan$data$sample, samp_type=shan$data$samp_type, mouseID=shan$data$mouseID, Treatment=shan$data$Treatment, Time=shan$data$Time, Sex=shan$data$Sex, shanH=shan$data$value) out$eff_no <- exp(out$shanH) str(out) out$Treatment <- factor(out$Treatment, levels = c("Control", "Low", "High"), ordered = TRUE) out$Sex <- factor(out$Sex, levels = c("female","male"), labels = c("Female","Male")) out$mouseID <- as.character(out$mouseID) str(out) out$Time <- factor(out$Time, levels=c("Week 0","Week 7"), labels = c("Week 0","Week 7")) p <- ggplot(data=out, aes(x = Treatment, y = eff_no)) + geom_boxplot() + facet_grid(~Time + Sex) p ## tidy plot # # # # # # # # # cols <- c("Control" = "#f46d43", "Low" = "#66c2a5" , "High" = "#5e4fa2" ) # orange, aqua, dark-blue set.seed(123) # don't colour outliers bp <- boxplot(out$eff_no ~ out$Treatment + out$Time + out$Sex) bp$out # 23.31338 43.85028 15.94216 20.03916 16.81255 sel <- which(out$eff_no %in% bp$out) out$eff_no[sel] # 23.31338 16.81255 43.85028 15.94216 20.03916 out[sel,] # sample samp_type mouseID Treatment Time Sex shanH eff_no # 15 C5m3T01 fecal C5m3 Control Week 1 Female 3.149027 23.31338 # 27 L4m2T01 fecal L4m2 Low Week 1 Male 2.822126 16.81255 # 42 C4m1T01 fecal C4m1 Control Week 1 Female 3.780781 43.85028 # 44 H4m3T01 fecal H4m3 High Week 1 Female 2.768967 15.94216 # 49 H4m3T16 fecal H4m3 High Week 7 Female 2.997688 20.03916 ## exclude outliers from Kruskal-Wallis significance testing out_exout <- out[-sel, ] sel <- which(out_exout$Time=="Week 0") wk0 <- out_exout[sel, ] sel <- which(out_exout$Time=="Week 7") wk7 <- out_exout[sel, ] # Kruskal-Wallis test kt <- kruskal.test( eff_no ~ Treatment, wk0) # Kruskal Wallis test kt # Kruskal-Wallis rank sum test # data: eff_no by Treatment # Kruskal-Wallis chi-squared = 1.3478, df = 2, p-value = 0.5097 # Kruskal-Wallis test kt <- kruskal.test( eff_no ~ Treatment, wk7) # Kruskal Wallis test kt # Kruskal-Wallis rank sum test # data: eff_no by Treatment # Kruskal-Wallis chi-squared = 2.311, df = 2, p-value = 0.3149 # Kruskal-Wallis test kt <- kruskal.test( eff_no ~ Treatment, wk0[which(wk0$Sex=="Female"), ]) # Kruskal Wallis test kt # Kruskal-Wallis rank sum test # data: eff_no by Treatment # Kruskal-Wallis chi-squared = 3.3884, df = 2, p-value = 0.1837 # Kruskal-Wallis test kt <- kruskal.test( eff_no ~ Treatment, wk0[which(wk0$Sex=="Male"), ]) # Kruskal Wallis test kt # Kruskal-Wallis rank sum test # data: eff_no by Treatment # Kruskal-Wallis chi-squared = 4.0085, df = 2, p-value = 0.1348 # Kruskal-Wallis test kt <- kruskal.test( eff_no ~ Treatment, wk7[which(wk7$Sex=="Female"), ]) # Kruskal Wallis test kt # Kruskal-Wallis rank sum test # data: eff_no by Treatment # Kruskal-Wallis chi-squared = 5.5777, df = 2, p-value = 0.06149 table( wk7[which(wk7$Sex=="Female"), ]$Treatment ) # Control Low High # 9 8 8 # Kruskal-Wallis test kt <- kruskal.test( eff_no ~ Treatment, wk7[which(wk7$Sex=="Male"), ]) # Kruskal Wallis test kt # Kruskal-Wallis rank sum test # data: eff_no by Treatment # Kruskal-Wallis chi-squared = 0.20106, df = 2, p-value = 0.9044 ## add geom_signif() ## see this example - https://www.rdocumentation.org/packages/ggsignif/versions/0.5.0 annotation_df <- data.frame( Time = c("Week 7"), Sex = c("Female"), start=c("Control"), end = c("High"), y = c(61), label = c("P = 0.06")) set.seed(123) p <- ggplot(data=out, aes(x = Treatment, y = eff_no)) + geom_boxplot(outlier.size = 1) + geom_signif(data=annotation_df, aes(xmin=start, xmax=end, annotations=label, y_position=y), textsize = 2.5, vjust = -0.2, tip_length = 0, margin_top = 0, manual=TRUE) + facet_grid(~Time+Sex) + geom_jitter(data = out_exout, aes(colour = Treatment), width=0.3, height=0, size=1) + scale_color_manual(values = cols, guide = FALSE) + theme_classic() + labs(y="Effective OTUs (count)", x=NULL) + theme( plot.margin = margin(t = 2, r = 2, b = 2, l = 5, "pt"), axis.ticks.length=unit(-3, "pt"), axis.ticks.x.bottom = element_blank(), axis.text.x = element_text(margin=margin(t = 5,r = 5,b = 2,l = 5,"pt"), size = rel(0.9)), # , size = rel(0.9)) axis.text.y = element_text(margin=margin(t = 5,r = 5,b = 5,l = 2,"pt"), size = rel(0.8)), axis.title.y = element_text(margin=margin(t = 5,r = 2,b = 5,l = 2,"pt"), size = rel(0.8)), strip.background = element_rect(fill="white", linetype = "blank"), strip.text = element_text(margin=margin(t = 2,r = 2,b = 2,l = 2,"pt")) ) p dev.print(tiff, file = paste0("plots/","Alpha-diversity-Fecal-Week0-Week7-bySex-vFINAL.tiff"), width = 14, height = 6, units = "cm", res=600, compression="lzw") # # # # # # # # # ## calculate difference in alpha diversity: Week 0 > Week 7 ## Need to identify which mouse cannot be traced from Week 0 > Week 7 # note from above, mouseID: L1m2 only has one entry sel <- which(out$mouseID == "L1m2") # 57 so exclude this when examine differences diff <- data.frame(mouseID = unique(as.character(out$mouseID[-sel])), w0 = NA, w7 = NA, diff_eff_no = NA, Treatment = NA, Sex=NA, stringsAsFactors = FALSE) str(diff) for (m in 1:length(diff$mouseID)) { # m<-1 sel.out.w0 <- which(out$mouseID == diff$mouseID[m] & out$Time == "Week 0") sel.out.w7 <- which(out$mouseID == diff$mouseID[m] & out$Time == "Week 7") diff$w0[m] <- out$eff_no[sel.out.w0] diff$w7[m] <- out$eff_no[sel.out.w7] diff$diff_eff_no[m] <- out$eff_no[sel.out.w7] - out$eff_no[sel.out.w0] diff$Treatment[m] <- unique( c( as.character(out$Treatment[sel.out.w7]), as.character(out$Treatment[sel.out.w0]) )) diff$Sex[m] <- unique( c( as.character(out$Sex[sel.out.w7]), as.character(out$Sex[sel.out.w0]) )) print(paste0("calculating mouse # ", m, " = ",diff$mouseID[m], ": ",out$mouseID[sel.out.w7]," : ", out$mouseID[sel.out.w0])) } str(diff) diff$Treatment <- factor(diff$Treatment, levels = c("Control", "Low", "High"), ordered = TRUE) diff$Sex <- factor(diff$Sex) str(diff) # 'data.frame': 53 obs. of 6 variables: # $ mouseID : chr "L3m3" "L1m3" "L5m3" "H3m1" ... # $ w1 : num 39.1 31.3 32.6 40.8 16.2 ... # $ w7 : num 47.8 55.1 26 54.7 57.6 ... # $ diff_eff_no: num 8.69 23.85 -6.6 13.89 41.38 ... # $ Treatment : Ord.factor w/ 3 levels "Control"<"Low"<..: 2 2 2 3 3 3 3 1 1 3 ... # $ Sex : Factor w/ 2 levels "Female","Male": 1 1 1 1 2 2 2 2 1 2 ... diff p <- ggplot(data=diff, aes(x = Treatment, y = diff_eff_no)) + geom_boxplot() + facet_grid(~Sex) p ## tidy plot # # # # # # # # # cols <- c("Control" = "#f46d43", "Low" = "#66c2a5" , "High" = "#5e4fa2" ) # orange, aqua, dark-blue set.seed(123) # don't colour outliers bp <- boxplot(diff$diff_eff_no ~ diff$Treatment + diff$Sex) bp$out # 41.18108 56.73688 sel <- which(diff$diff_eff_no %in% bp$out) diff$diff_eff_no[sel] # 41.18108 56.73688 diff[sel,] # mouseID w1 w7 diff_eff_no Treatment Sex # 8 C1m3 12.473278 53.65436 41.181084388 Control Male # 39 C1m2 4.414451 59.94410 55.529652716 Control Male diff_exout <- diff[-sel, ] # Kruskal-Wallis test kt <- kruskal.test( diff_eff_no ~ Treatment, diff_exout[which(diff_exout$Sex=="Female"), ]) # Kruskal Wallis test kt # Kruskal-Wallis rank sum test # data: diff_eff_no by Treatment # Kruskal-Wallis chi-squared = 1.382, df = 2, p-value = 0.5011 # Kruskal-Wallis test kt <- kruskal.test( diff_eff_no ~ Treatment, diff_exout[which(diff_exout$Sex=="Male"), ]) # Kruskal Wallis test kt # Kruskal-Wallis rank sum test # data: diff_eff_no by Treatment # Kruskal-Wallis chi-squared = 1.1499, df = 2, p-value = 0.5627 set.seed(123) p <- ggplot(data=diff, aes(x = Treatment, y = diff_eff_no)) + geom_boxplot(outlier.size = 1) + geom_jitter(data = diff[-sel, ], aes(colour = Treatment), width=0.3, height=0, size=1) + scale_color_manual(values = cols, guide = FALSE) + theme_classic() + labs(y="Change in effective OTUs (count)", x=NULL) + geom_hline(yintercept = 0, col="red", linetype="dashed") + facet_grid(~Sex) + theme( plot.margin = margin(t = 2, r = 2, b = 2, l = 5, "pt"), axis.ticks.length=unit(-3, "pt"), axis.ticks.x.bottom = element_blank(), axis.text.x = element_text(margin=margin(t = 5,r = 5,b = 2,l = 5,"pt"), size = rel(0.9)), # , size = rel(0.9)) axis.text.y = element_text(margin=margin(t = 5,r = 5,b = 5,l = 2,"pt"), size = rel(0.8)), axis.title.y = element_text(margin=margin(t = 5,r = 2,b = 5,l = 2,"pt"), size = rel(0.8)), strip.background = element_rect(fill="white", linetype = "blank") ) p dev.print(tiff, file = paste0("plots/","Alpha-diversity-Fecal-Difference-Week0-to-Week7-bySex-vFINAL.tiff"), width = 8.3, height = 6, units = "cm", res=600, compression="lzw") #------------------------ #### Cecal only - alpha diversity comparisons - Rarefied #### Includes comparison by Sex #------------------------ cecal.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 550 taxa and 53 samples ] # sample_data() Sample Data: [ 53 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 550 taxa by 7 taxonomic ranks ] table(cecal.dc.16s@sam_data$Cage.Name) # C1 C2 C3 C4 C5 C6 H1 H2 H3 H4 H5 H6 L1 L2 L3 L4 L5 L6 # 3 3 3 3 3 3 3 3 3 3 3 3 3 2 3 3 3 3 table( cecal.dc.16s@sam_data$Treatment ) # Control High Low # 18 18 17 table( cecal.dc.16s@sam_data$Time ) # Post-exposure # 53 ## Use lowest number of reads to create rarefied dataset seed <- 123 rare.cecal.dc.16s <- rarefy_even_depth(cecal.dc.16s, sample.size = min(sample_sums(cecal.dc.16s)), rngseed = seed, replace = FALSE, trimOTUs = TRUE, verbose = TRUE) sample_sums(rare.cecal.dc.16s) # all 14632 ntaxa(rare.cecal.dc.16s) # 496 nsamples(rare.cecal.dc.16s) # 53 shan <- plot_richness(rare.cecal.dc.16s, measures=c("Shannon")) str(shan) out <- data.frame(sample=shan$data$sample, samp_type=shan$data$samp_type, mouseID=shan$data$mouseID, Treatment=shan$data$Treatment, Time=shan$data$Time, Sex=shan$data$Sex, shanH=shan$data$value) out$eff_no <- exp(out$shanH) str(out) out$Treatment <- factor(out$Treatment, levels = c("Control", "Low", "High"), ordered = TRUE) out$Sex <- factor(out$Sex, levels = c("female", "male"), labels=c("Female","Male")) p <- ggplot(data=out, aes(x = Treatment, y = eff_no)) + geom_boxplot() + facet_grid(~Sex) p # # # # # # # # # # # # # ## exclude outlier for significance test bp <- boxplot(out$eff_no ~ out$Treatment + out$Sex) bp$out # 15.73833 sel <- which(out$eff_no %in% bp$out) out$eff_no[sel] # 15.73833 out_exout<-out[-sel,] # Kruskal-Wallis test kt <- kruskal.test( eff_no ~ Treatment, out_exout[which(out_exout$Sex=="Female") ,]) # Kruskal Wallis test kt # Kruskal-Wallis rank sum test # data: eff_no by Treatment # Kruskal-Wallis chi-squared = 5.2925, df = 2, p-value = 0.07092 table( out_exout[which(out_exout$Sex=="Female") ,]$Treatment ) # Control Low High # 8 9 9 # Kruskal-Wallis test kt <- kruskal.test( eff_no ~ Treatment, out_exout[which(out_exout$Sex=="Male") ,]) # Kruskal Wallis test kt # Kruskal-Wallis rank sum test # data: eff_no by Treatment # Kruskal-Wallis chi-squared = 2.4444, df = 2, p-value = 0.2946 ## tidy plot # # # # # # # # # cols <- c("Control" = "#f46d43", "Low" = "#66c2a5" , "High" = "#5e4fa2" ) # orange, aqua, dark-blue annotation_df <- data.frame( Sex = c("Female"), start=c("Control"), end = c("High"), y = c(43), label = c("P = 0.07")) set.seed(4321) # for jitter p <- ggplot(data=out, aes(x = Treatment, y = eff_no)) + geom_boxplot(outlier.size = 1) + geom_signif(data=annotation_df, aes(xmin=start, xmax=end, annotations=label, y_position=y), textsize = 2.5, vjust = -0.2, tip_length = 0, margin_top = 0, manual=TRUE) + geom_jitter(data = out_exout, aes(colour = Treatment), width=0.3, height=0, size =1 ) + scale_color_manual(values = cols, guide = FALSE) + theme_classic() + facet_grid(~Sex) + labs(y="Effective OTUs (count)", x=NULL) + theme( plot.margin = margin(t = 2, r = 2, b = 2, l = 5, "pt"), axis.ticks.length=unit(-3, "pt"), axis.ticks.x.bottom = element_blank(), axis.text.x = element_text(margin=margin(t = 5,r = 5,b = 2,l = 5,"pt"), size = rel(0.9)), # , size = rel(0.9)) axis.text.y = element_text(margin=margin(t = 5,r = 5,b = 5,l = 2,"pt"), size = rel(0.8)), axis.title.y = element_text(margin=margin(t = 5,r = 2,b = 5,l = 2,"pt"), size = rel(0.8)), strip.background = element_rect(fill="white", linetype = "blank"), strip.text = element_text(margin=margin(t = 2,r = 2,b = 2,l = 2,"pt")) ) #+ p ggsave(plot=p, filename = paste0("plots/","Alpha-diversity-Cecal-vFINAL.tiff"), width = 8.3, height = 6, units = "cm", dpi = 600, compression = "lzw") # # # # # # # # # # # # # #------------------------ #### Compare FECAL microbiota ordination and composition of #### Top third vs. lowest third Open Field centre time - FEMALE only #### Ordination #------------------------ fecalw7.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 645 taxa and 53 samples ] # sample_data() Sample Data: [ 53 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 645 taxa by 7 taxonomic ranks ] str(of.post) sel <- which(of.post.COPY$Sex=="Female") beh_fem <- of.post.COPY[sel, ] quantile(beh_fem$Centre_time, probs = c(1/3, 2/3) ) # 33.33333% 66.66667% # 26.56667 51.76667 beh_fem$group <- NA sel <- which(beh_fem$Centre_time <= quantile(beh_fem$Centre_time, probs = c(1/3) )) beh_fem$group[sel] <- "anxious" sel <- which(beh_fem$Centre_time >= quantile(beh_fem$Centre_time, probs = c(2/3) )) beh_fem$group[sel] <- "less anxious" # remove middle third ok <- complete.cases(beh_fem) sel <- which(ok==TRUE) beh_fem_groups <- beh_fem[sel, ] dim(beh_fem_groups) # 18 39 beh_fem_groups[ ,c("ID","Treatment","Centre_time","group")] # ID Treatment Centre_time group # 4 C2m1 Control 10.4 anxious # 5 C2m2 Control 162.5 less anxious # 6 C2m3 Control 88.7 less anxious # 10 C4m1 Control 12.1 anxious # 11 C4m2 Control 3.3 anxious # 13 C5m1 Control 10.6 anxious # 15 C5m3 Control 25.5 anxious # 27 H3m3 High 61.4 less anxious # 28 H4m1 High 22.6 anxious # 29 H4m2 High 90.0 less anxious # 34 H6m1 High 97.6 less anxious # 36 H6m3 High 57.7 less anxious # 38 L1m2 Low 82.5 less anxious # note: week 7 fecal sample for mouse L1m2 is not available # 43 L3m1 Low 13.0 anxious # 44 L3m2 Low 126.8 less anxious # 45 L3m3 Low 20.4 anxious # 50 L5m2 Low 18.5 anxious # 51 L5m3 Low 113.0 less anxious females <- as.character(beh_fem_groups$ID) fem.fecalw7.dc.16s <- prune_samples(fecalw7.dc.16s@sam_data$mouseID %in% females, fecalw7.dc.16s) fem.fecalw7.dc.16s min(sample_sums(fem.fecalw7.dc.16s)) # 17606 min(taxa_sums(fem.fecalw7.dc.16s)) # 0 # prune taxa that have zero sequence reads fem.fecalw7.dc.16s <- prune_taxa( taxa = taxa_sums(fem.fecalw7.dc.16s) > 0, x = fem.fecalw7.dc.16s ) fem.fecalw7.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 562 taxa and 17 samples ] # sample_data() Sample Data: [ 17 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 562 taxa by 7 taxonomic ranks ] table( fem.fecalw7.dc.16s@sam_data$Treatment ) # Control High Low # 7 5 5 ## Use lowest number of reads to create rarefied dataset seed <- 123 rare.fem.fecalw7.dc.16s <- rarefy_even_depth(fem.fecalw7.dc.16s, sample.size = min(sample_sums(fem.fecalw7.dc.16s)), rngseed = seed, replace = FALSE, trimOTUs = TRUE, verbose = TRUE) sample_sums(rare.fem.fecalw7.dc.16s) # all 17606 ntaxa(rare.fem.fecalw7.dc.16s) # 538 nsamples(rare.fem.fecalw7.dc.16s) # 17 ### ORDINATION PLOT [finished-plot] # # # # # # # # # # # # # # # ### NMDS + Bray-Curtis set.seed(123) ord <- ordinate(rare.fem.fecalw7.dc.16s, "NMDS", "bray") ord # Call: # metaMDS(comm = veganifyOTU(physeq), distance = distance) # # global Multidimensional Scaling using monoMDS # # Data: wisconsin(sqrt(veganifyOTU(physeq))) # Distance: bray # # Dimensions: 2 # Stress: 0.113773 # Stress type 1, weak ties # Two convergent solutions found after 20 tries # Scaling: centring, PC rotation, halfchange scaling # Species: expanded scores based on 'wisconsin(sqrt(veganifyOTU(physeq)))' str(ord) rare.fem.fecalw7.dc.16s@sam_data$samp_type # all "fecal" rare.fem.fecalw7.dc.16s@sam_data$Time # all "Week 7" ## join to group: anxious vs less anxious dat <- rare.fem.fecalw7.dc.16s@sam_data dat <- data.frame(dat) class(dat) head(dat) # join data dat <- merge(x = dat, y = beh_fem_groups[ ,c("ID","group")], by.x= "mouseID", by.y= "ID", all.x = TRUE ) row.names(dat) <- dat$sample # re-assign joined data to phyloseq object sample_data(rare.fem.fecalw7.dc.16s) <- dat p <- plot_ordination(rare.fem.fecalw7.dc.16s, ord, type="samples", color="group", shape="Cage.Name") p str(p) p$data$Cage.Name <- factor(p$data$Cage.Name, levels = c( paste0("C",1:6),paste0("L",1:6),paste0("H",1:6) ), ordered = TRUE) p$data$Sex #p$data$group <- factor(p$data$group, levels = c("anxious" , "less anxious") ) p$data$group <- factor(p$data$group, levels = c("anxious" , "less anxious"), labels = c("1/3 most\nanxious", "1/3 least\nanxious") ) # cols <- c("Control" = "#f46d43", "Low" = "#66c2a5" , "High" = "#5e4fa2" , # orange, aqua, dark-blue # "anxious" = "#ffcc00", "less anxious" = "#3399ff") cols <- c("Control" = "#f46d43", "Low" = "#66c2a5" , "High" = "#5e4fa2" , # orange, aqua, dark-blue "1/3 most\nanxious" = "#ffcc00", "1/3 least\nanxious" = "#3399ff") shapes <- c(1:18) pp <- p + theme_bw() + scale_colour_manual(values = cols) + scale_shape_manual(values = shapes, name = "Enclosure") + scale_fill_manual(values = cols) + #geom_polygon(aes(fill = Treatment), alpha = 0.3) + geom_point(aes(colour = group)) + annotate(geom="text", x= 0.43, y= -0.3, label = paste0("Stress = ",round(ord$stress,5)), hjust=1, vjust=0, size=3.25 ) + guides( color = guide_legend(order = 0), shape = guide_legend(order = 1) ) + guides( shape = guide_legend(ncol = 2) ) # + pp ## add centroids #https://stackoverflow.com/questions/23463324/r-add-centroids-to-scatter-plot x <- p$data$NMDS1 y <- p$data$NMDS2 group <- p$data$group df <- data.frame(group, x, y) centroids <- aggregate(cbind(x,y)~group,df,mean) f <- function(z) {sd(z)/sqrt(length(z))} # function to calculate std.err se <- aggregate(cbind(se.x=x,se.y=y)~group,df,f) centroids <- merge(centroids,se, by="group") # add std.err column to centroids str(centroids) ## plot for publication ## https://stackoverflow.com/questions/47516448/how-to-get-ordispider-like-clusters-in-ggplot-with-nmds # NMDS scores scrs <- data.frame(x=p$data$NMDS1, y=p$data$NMDS2, group=p$data$group) names(centroids) # "group" "x" "y" "se.x" "se.y" # segments segs <- merge(scrs, centroids[ ,c("group", "x", "y") ], by = 'group', sort = FALSE) #pp <- ggplot(data = p$data, aes(x=NMDS1, y=NMDS2, shape = Cage.Name, colour = group) ) + pp <- ggplot() + theme_bw() + # #ggtitle("a") + #geom_point(size = 1.5) + scale_colour_manual(values = cols, name = "Group") + scale_shape_manual(values = shapes, name = "Enclosure") + #scale_fill_manual(values = cols) + #geom_point(data=centroids, aes(x=x, y=y, colour=group), shape=1, size=4, stroke=0.65, inherit.aes = FALSE, show.legend = FALSE) + geom_segment(data = segs, mapping = aes(x=x.x, xend = x.y, y=y.x, yend = y.y, colour = group), size = 0.5, alpha=0.5) + # spiders geom_point(data = centroids, aes(x=x, y=y, colour=group), size = 3, alpha=0.5, inherit.aes = FALSE, show.legend = FALSE) + # centroids geom_point(data = p$data, aes(x=NMDS1, y=NMDS2, shape = Cage.Name, colour = group), size = 1.5) + # points guides( shape = guide_legend(ncol = 2) ) + #annotate(geom="text", x= 0.41, y= -0.44, label = paste0("Stress = ",round(ord$stress,4)), hjust=1, vjust=0, size=2.2 ) + annotate(geom="text", x= 0.41, y= -0.44, label = paste0("Females\nStress = ",round(ord$stress,4)), hjust=1, vjust=0, size=2.2 ) + geom_text_repel(data = p$data, aes(x=NMDS1, y=NMDS2, label = mouseID), colour = "#737373" , size=2) + # light grey: "#999999" labs(x = NULL, y = NULL) + theme( #plot.margin = margin(t = 2, r = 5, b = 2, l = 2, "pt"), plot.margin = margin(t = 2, r = 1, b = 2, l = 1, "pt"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_blank(), axis.text.y = element_blank(), axis.ticks = element_blank(), legend.text = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.55)), legend.title = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.6)), #legend.margin = margin(t = 0,r = 0,b = 0,l = 0,"pt"), legend.margin = margin(t = 0,r = 0,b = 0,l = 2,"pt"), legend.box.spacing = unit(0, "pt"), legend.box.margin = margin(t = 0,r = 2,b = 0,l = 2,"pt"), legend.spacing = unit(4, "pt"), #legend.justification=c(0,0), #legend.position=c(0.05, 0.05), #legend.background = element_blank(), legend.key = element_blank(), legend.key.size = unit(2, "pt") ) pp grid.text(label = "A" , x = unit(0.06, "npc") , y = unit(0.93,"npc"), gp=gpar(fontsize=11, fontface="bold") ) dev.print(tiff, file = paste0("plots/","Ordination-Fecal-Week7-Female-top-vs-low-anxiety-1xRarefy-withSpiders-vFINAL.tiff"), width = 8.7, height = 6, units = "cm", res=600, compression="lzw") # Test hypothesis that microbiota vary (with different centroids) by Treatment # Calculate bray curtis distance matrix set.seed(123) bray.rare.fem.fecalw7.dc.16s <- phyloseq::distance(rare.fem.fecalw7.dc.16s, method = "bray") sampledf <- data.frame(sample_data(rare.fem.fecalw7.dc.16s)) str(sampledf) ## Refer to PERMANOVA test below with both anxiety grouping and cage # # Adonis test # set.seed(123) # adonis(bray.rare.fem.fecalw7.dc.16s ~ group, data = sampledf) # # Call: # # adonis(formula = bray.rare.fem.fecalw7.dc.16s ~ group, data = sampledf) # # # # Permutation: free # # Number of permutations: 999 # # # # Terms added sequentially (first to last) # # # # Df SumsOfSqs MeanSqs F.Model R2 Pr(>F) # # group 1 0.25285 0.25285 2.3832 0.1371 0.044 * # # Residuals 15 1.59148 0.10610 0.8629 # # Total 16 1.84433 1.0000 # # --- # # Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 beta <- betadisper(bray.rare.fem.fecalw7.dc.16s, sampledf$group) set.seed(123) permutest(beta) # Homogeneity of dispersion test # Permutation test for homogeneity of multivariate dispersions # Permutation: free # Number of permutations: 999 # # Response: Distances # Df Sum Sq Mean Sq F N.Perm Pr(>F) # Groups 1 0.00439 0.0043897 0.5157 999 0.487 # Residuals 15 0.12768 0.0085120 # Adonis test set.seed(123) adonis(bray.rare.fem.fecalw7.dc.16s ~ group + Cage.Name, data = sampledf) # Call: # adonis(formula = bray.rare.fem.fecalw7.dc.16s ~ group + Cage.Name, data = sampledf) # # Permutation: free # Number of permutations: 999 # # Terms added sequentially (first to last) # # Df SumsOfSqs MeanSqs F.Model R2 Pr(>F) # group 1 0.25285 0.25285 3.3758 0.13710 0.017 * # Cage.Name 7 0.99228 0.14175 1.8926 0.53802 0.018 * # Residuals 8 0.59920 0.07490 0.32489 # Total 16 1.84433 1.00000 # --- # Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 table( rare.fem.fecalw7.dc.16s@sam_data$group ) # anxious less anxious # 9 8 table( rare.fem.fecalw7.dc.16s@sam_data$Cage.Name ) # C2 C4 C5 H3 H4 H6 L3 L5 # 3 2 2 1 2 2 3 2 # #------------------------ #### Compare CECAL microbiota ordination and composition of #### Top third vs. lowest third Open Field centre time - FEMALE only #### Ordination #------------------------ cecal.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 550 taxa and 53 samples ] # sample_data() Sample Data: [ 53 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 550 taxa by 7 taxonomic ranks ] str(of.post) sel <- which(of.post.COPY$Sex=="Female") beh_fem <- of.post.COPY[sel, ] quantile(beh_fem$Centre_time, probs = c(1/3, 2/3) ) # 33.33333% 66.66667% # 26.56667 51.76667 beh_fem$group <- NA sel <- which(beh_fem$Centre_time <= quantile(beh_fem$Centre_time, probs = c(1/3) )) beh_fem$group[sel] <- "anxious" sel <- which(beh_fem$Centre_time >= quantile(beh_fem$Centre_time, probs = c(2/3) )) beh_fem$group[sel] <- "less anxious" # remove middle third ok <- complete.cases(beh_fem) sel <- which(ok==TRUE) beh_fem_groups <- beh_fem[sel, ] dim(beh_fem_groups) # 18 39 beh_fem_groups[ ,c("ID","Treatment","Centre_time","group")] # ID Treatment Centre_time group # 4 C2m1 Control 10.4 anxious # 5 C2m2 Control 162.5 less anxious # 6 C2m3 Control 88.7 less anxious # 10 C4m1 Control 12.1 anxious # 11 C4m2 Control 3.3 anxious # 13 C5m1 Control 10.6 anxious # 15 C5m3 Control 25.5 anxious # 27 H3m3 High 61.4 less anxious # 28 H4m1 High 22.6 anxious # 29 H4m2 High 90.0 less anxious # 34 H6m1 High 97.6 less anxious # 36 H6m3 High 57.7 less anxious # 38 L1m2 Low 82.5 less anxious # 43 L3m1 Low 13.0 anxious # 44 L3m2 Low 126.8 less anxious # 45 L3m3 Low 20.4 anxious # 50 L5m2 Low 18.5 anxious # 51 L5m3 Low 113.0 less anxious females <- as.character(beh_fem_groups$ID) fem.cecal.dc.16s <- prune_samples(cecal.dc.16s@sam_data$mouseID %in% females, cecal.dc.16s) fem.cecal.dc.16s min(sample_sums(fem.cecal.dc.16s)) # 14632 min(taxa_sums(fem.cecal.dc.16s)) # 0 # prune taxa that have zero sequence reads fem.cecal.dc.16s <- prune_taxa( taxa = taxa_sums(fem.cecal.dc.16s) > 0, x = fem.cecal.dc.16s ) fem.cecal.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 494 taxa and 18 samples ] # sample_data() Sample Data: [ 18 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 494 taxa by 7 taxonomic ranks ] table( fem.cecal.dc.16s@sam_data$Treatment ) # Control High Low # 7 5 6 ## Use lowest number of reads to create rarefied dataset seed <- 123 rare.fem.cecal.dc.16s <- rarefy_even_depth(fem.cecal.dc.16s, sample.size = min(sample_sums(fem.cecal.dc.16s)), rngseed = seed, replace = FALSE, trimOTUs = TRUE, verbose = TRUE) sample_sums(rare.fem.cecal.dc.16s) # all 14632 ntaxa(rare.fem.cecal.dc.16s) # 431 nsamples(rare.fem.cecal.dc.16s) # 18 ### ORDINATION PLOT [finished-plot] # # # # # # # # # # # # # # # ### NMDS + Bray-Curtis set.seed(123) ord <- ordinate(rare.fem.cecal.dc.16s, "NMDS", "bray") ord # Call: # metaMDS(comm = veganifyOTU(physeq), distance = distance) # # global Multidimensional Scaling using monoMDS # # Data: wisconsin(sqrt(veganifyOTU(physeq))) # Distance: bray # # Dimensions: 2 # Stress: 0.1208276 # Stress type 1, weak ties # Two convergent solutions found after 20 tries # Scaling: centring, PC rotation, halfchange scaling # Species: expanded scores based on ‘wisconsin(sqrt(veganifyOTU(physeq)))’ str(ord) rare.fem.cecal.dc.16s@sam_data$samp_type # all "cecal" rare.fem.cecal.dc.16s@sam_data$Time # all "Post-exposure" ## join to group: anxious vs less anxious dat <- rare.fem.cecal.dc.16s@sam_data dat <- data.frame(dat) class(dat) head(dat) # join data dat <- merge(x = dat, y = beh_fem_groups[ ,c("ID","group")], by.x= "mouseID", by.y= "ID", all.x = TRUE ) row.names(dat) <- dat$sample # re-assign joined data to phyloseq object sample_data(rare.fem.cecal.dc.16s) <- dat p <- plot_ordination(rare.fem.cecal.dc.16s, ord, type="samples", color="group", shape="Cage.Name") p str(p) p$data$Cage.Name <- factor(p$data$Cage.Name, levels = c( paste0("C",1:6),paste0("L",1:6),paste0("H",1:6) ), ordered = TRUE) p$data$Sex #p$data$group <- factor(p$data$group, levels = c("anxious" , "less anxious") ) p$data$group <- factor(p$data$group, levels = c("anxious" , "less anxious"), labels = c("1/3 most\nanxious", "1/3 least\nanxious") ) # cols <- c("Control" = "#f46d43", "Low" = "#66c2a5" , "High" = "#5e4fa2" , # orange, aqua, dark-blue # "anxious" = "#ffcc00", "less anxious" = "#3399ff") cols <- c("Control" = "#f46d43", "Low" = "#66c2a5" , "High" = "#5e4fa2" , # orange, aqua, dark-blue "1/3 most\nanxious" = "#ffcc00", "1/3 least\nanxious" = "#3399ff") shapes <- c(1:18) pp <- p + theme_bw() + scale_colour_manual(values = cols) + scale_shape_manual(values = shapes, name = "Enclosure") + scale_fill_manual(values = cols) + #geom_polygon(aes(fill = Treatment), alpha = 0.3) + geom_point(aes(colour = group)) + annotate(geom="text", x= 0.43, y= -0.3, label = paste0("Stress = ",round(ord$stress,5)), hjust=1, vjust=0, size=3.25 ) + guides( color = guide_legend(order = 0), shape = guide_legend(order = 1) ) + guides( shape = guide_legend(ncol = 2) ) # + pp # Test hypothesis that microbiota vary (with different centroids) by Treatment # Calculate bray curtis distance matrix set.seed(123) bray.rare.fem.cecal.dc.16s <- phyloseq::distance(rare.fem.cecal.dc.16s, method = "bray") sampledf <- data.frame(sample_data(rare.fem.cecal.dc.16s)) str(sampledf) beta <- betadisper(bray.rare.fem.cecal.dc.16s, sampledf$group) set.seed(123) permutest(beta) # Permutation test for homogeneity of multivariate dispersions # Permutation: free # Number of permutations: 999 # # Response: Distances # Df Sum Sq Mean Sq F N.Perm Pr(>F) # Groups 1 0.007326 0.0073258 1.3935 999 0.257 # Residuals 16 0.084114 0.0052571 # Adonis test set.seed(123) adonis(bray.rare.fem.cecal.dc.16s ~ group + Cage.Name, data = sampledf) # Call: # adonis(formula = bray.rare.fem.cecal.dc.16s ~ group + Cage.Name, data = sampledf) # # Permutation: free # Number of permutations: 999 # # Terms added sequentially (first to last) # # Df SumsOfSqs MeanSqs F.Model R2 Pr(>F) # group 1 0.09869 0.098687 1.9994 0.06973 0.064 . # Cage.Name 8 0.92171 0.115213 2.3343 0.65127 0.004 ** # Residuals 8 0.39486 0.049358 0.27900 # Total 17 1.41525 1.00000 # --- # Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 table( rare.fem.cecal.dc.16s@sam_data$group ) # anxious less anxious # 9 9 table( rare.fem.cecal.dc.16s@sam_data$Cage.Name ) # C2 C4 C5 H3 H4 H6 L1 L3 L5 # 3 2 2 1 2 2 1 3 2 # #------------------------ #### Compare FECAL microbiota ordination and composition of #### Top third vs. lowest third Open Field centre time - MALE only #### Ordination #------------------------ fecalw7.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 645 taxa and 53 samples ] # sample_data() Sample Data: [ 53 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 645 taxa by 7 taxonomic ranks ] str(of.post) sel <- which(of.post.COPY$Sex=="Male") beh_male <- of.post.COPY[sel, ] quantile(beh_male$Centre_time, probs = c(1/3, 2/3) ) # 33.33333% 66.66667% # 25.70000 69.06667 beh_male$group <- NA sel <- which(beh_male$Centre_time <= quantile(beh_male$Centre_time, probs = c(1/3) )) beh_male$group[sel] <- "anxious" sel <- which(beh_male$Centre_time >= quantile(beh_male$Centre_time, probs = c(2/3) )) beh_male$group[sel] <- "less anxious" # remove middle third ok <- complete.cases(beh_male) sel <- which(ok==TRUE) beh_male_groups <- beh_male[sel, ] dim(beh_male_groups) # 19 39 beh_male_groups[ ,c("ID","Treatment","Centre_time","group")] # ID Treatment Centre_time group # 1 C1m1 Control 13.2 anxious # 2 C1m2 Control 128.8 less anxious # 3 C1m3 Control 71.6 less anxious # 8 C3m2 Control 208.3 less anxious # 16 C6m1 Control 86.8 less anxious # 18 C6m3 Control 21.8 anxious # 19 H1m1 High 21.2 anxious # 20 H1m2 High 25.7 anxious # 22 H2m1 High 13.8 anxious # 23 H2m2 High 289.9 less anxious # 24 H2m3 High 70.8 less anxious # 32 H5m2 High 89.3 less anxious # 33 H5m3 High 279.0 less anxious # 40 L2m1 Low 13.2 anxious # 41 L2m2 Low 9.0 anxious # 42 L2m3 Low 92.0 less anxious # 46 L4m1 Low 11.4 anxious # 47 L4m2 Low 23.2 anxious # 53 L6m2 Low 25.7 anxious males <- as.character(beh_male_groups$ID) male.fecalw7.dc.16s <- prune_samples(fecalw7.dc.16s@sam_data$mouseID %in% males, fecalw7.dc.16s) male.fecalw7.dc.16s min(sample_sums(male.fecalw7.dc.16s)) # 19385 min(taxa_sums(male.fecalw7.dc.16s)) # 0 # prune taxa that have zero sequence reads male.fecalw7.dc.16s <- prune_taxa( taxa = taxa_sums(male.fecalw7.dc.16s) > 0, x = male.fecalw7.dc.16s ) male.fecalw7.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 580 taxa and 19 samples ] # sample_data() Sample Data: [ 19 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 580 taxa by 7 taxonomic ranks ] table( male.fecalw7.dc.16s@sam_data$Treatment ) # Control High Low # 6 7 6 ## Use lowest number of reads to create rarefied dataset seed <- 123 rare.male.fecalw7.dc.16s <- rarefy_even_depth(male.fecalw7.dc.16s, sample.size = min(sample_sums(male.fecalw7.dc.16s)), rngseed = seed, replace = FALSE, trimOTUs = TRUE, verbose = TRUE) sample_sums(rare.male.fecalw7.dc.16s) # all 19385 ntaxa(rare.male.fecalw7.dc.16s) # 535 nsamples(rare.male.fecalw7.dc.16s) # 19 ### ORDINATION PLOT [finished-plot] # # # # # # # # # # # # # # # ### NMDS + Bray-Curtis set.seed(123) ord <- ordinate(rare.male.fecalw7.dc.16s, "NMDS", "bray") ord # Call: # metaMDS(comm = veganifyOTU(physeq), distance = distance) # # global Multidimensional Scaling using monoMDS # # Data: wisconsin(sqrt(veganifyOTU(physeq))) # Distance: bray # # Dimensions: 2 # Stress: 0.1110704 # Stress type 1, weak ties # Two convergent solutions found after 20 tries # Scaling: centring, PC rotation, halfchange scaling # Species: expanded scores based on ‘wisconsin(sqrt(veganifyOTU(physeq)))’ str(ord) rare.male.fecalw7.dc.16s@sam_data$samp_type # all "fecal" rare.male.fecalw7.dc.16s@sam_data$Time # all "Week 7" ## join to group: anxious vs less anxious dat <- rare.male.fecalw7.dc.16s@sam_data dat <- data.frame(dat) class(dat) head(dat) # join data dat <- merge(x = dat, y = beh_male_groups[ ,c("ID","group")], by.x= "mouseID", by.y= "ID", all.x = TRUE ) row.names(dat) <- dat$sample # re-assign joined data to phyloseq object sample_data(rare.male.fecalw7.dc.16s) <- dat p <- plot_ordination(rare.male.fecalw7.dc.16s, ord, type="samples", color="group", shape="Cage.Name") p str(p) p$data$Cage.Name <- factor(p$data$Cage.Name, levels = c( paste0("C",1:6),paste0("L",1:6),paste0("H",1:6) ), ordered = TRUE) p$data$Sex #p$data$group <- factor(p$data$group, levels = c("anxious" , "less anxious") ) p$data$group <- factor(p$data$group, levels = c("anxious" , "less anxious"), labels = c("1/3 most\nanxious", "1/3 least\nanxious") ) # cols <- c("Control" = "#f46d43", "Low" = "#66c2a5" , "High" = "#5e4fa2" , # orange, aqua, dark-blue # "anxious" = "#ffcc00", "less anxious" = "#3399ff") cols <- c("Control" = "#f46d43", "Low" = "#66c2a5" , "High" = "#5e4fa2" , # orange, aqua, dark-blue "1/3 most\nanxious" = "#ffcc00", "1/3 least\nanxious" = "#3399ff") shapes <- c(1:18) pp <- p + theme_bw() + scale_colour_manual(values = cols) + scale_shape_manual(values = shapes, name = "Enclosure") + scale_fill_manual(values = cols) + #geom_polygon(aes(fill = Treatment), alpha = 0.3) + geom_point(aes(colour = group)) + annotate(geom="text", x= 0.43, y= -0.3, label = paste0("Stress = ",round(ord$stress,5)), hjust=1, vjust=0, size=3.25 ) + guides( color = guide_legend(order = 0), shape = guide_legend(order = 1) ) + guides( shape = guide_legend(ncol = 2) ) # + pp ## add centroids #https://stackoverflow.com/questions/23463324/r-add-centroids-to-scatter-plot x <- p$data$NMDS1 y <- p$data$NMDS2 group <- p$data$group df <- data.frame(group, x, y) centroids <- aggregate(cbind(x,y)~group,df,mean) f <- function(z) {sd(z)/sqrt(length(z))} # function to calculate std.err se <- aggregate(cbind(se.x=x,se.y=y)~group,df,f) centroids <- merge(centroids,se, by="group") # add std.err column to centroids str(centroids) ## plot for publication ## https://stackoverflow.com/questions/47516448/how-to-get-ordispider-like-clusters-in-ggplot-with-nmds # NMDS scores scrs <- data.frame(x=p$data$NMDS1, y=p$data$NMDS2, group=p$data$group) names(centroids) # "group" "x" "y" "se.x" "se.y" # segments segs <- merge(scrs, centroids[ ,c("group", "x", "y") ], by = 'group', sort = FALSE) #pp <- ggplot(data = p$data, aes(x=NMDS1, y=NMDS2, shape = Cage.Name, colour = group) ) + pp <- ggplot() + theme_bw() + # #ggtitle("a") + #geom_point(size = 1.5) + scale_colour_manual(values = cols, name = "Group") + scale_shape_manual(values = shapes, name = "Enclosure") + #scale_fill_manual(values = cols) + #geom_point(data=centroids, aes(x=x, y=y, colour=group), shape=1, size=4, stroke=0.65, inherit.aes = FALSE, show.legend = FALSE) + geom_segment(data = segs, mapping = aes(x=x.x, xend = x.y, y=y.x, yend = y.y, colour = group), size = 0.5, alpha=0.5) + # spiders geom_point(data = centroids, aes(x=x, y=y, colour=group), size = 3, alpha=0.5, inherit.aes = FALSE, show.legend = FALSE) + # centroids geom_point(data = p$data, aes(x=NMDS1, y=NMDS2, shape = Cage.Name, colour = group), size = 1.5) + # points guides( shape = guide_legend(ncol = 2) ) + #annotate(geom="text", x= 0.41, y= -0.44, label = paste0("Stress = ",round(ord$stress,4)), hjust=1, vjust=0, size=2.2 ) + annotate(geom="text", x= 0.405, y= 0.195, label = paste0("Males\nStress = ",round(ord$stress,4)), hjust=1, vjust=0, size=2.2 ) + geom_text_repel(data = p$data, aes(x=NMDS1, y=NMDS2, label = mouseID), colour = "#737373" , size=2) + # light grey: "#999999" labs(x = NULL, y = NULL) + theme( #plot.margin = margin(t = 2, r = 5, b = 2, l = 2, "pt"), plot.margin = margin(t = 2, r = 1, b = 2, l = 1, "pt"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_blank(), axis.text.y = element_blank(), axis.ticks = element_blank(), legend.text = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.55)), legend.title = element_text(margin=margin(t = 0,r = 0,b = 0,l = 0,"pt"), size = rel(0.6)), #legend.margin = margin(t = 0,r = 0,b = 0,l = 0,"pt"), legend.margin = margin(t = 0,r = 0,b = 0,l = 2,"pt"), legend.box.spacing = unit(0, "pt"), legend.box.margin = margin(t = 0,r = 2,b = 0,l = 2,"pt"), legend.spacing = unit(4, "pt"), #legend.justification=c(0,0), #legend.position=c(0.05, 0.05), #legend.background = element_blank(), legend.key = element_blank(), legend.key.size = unit(2, "pt") ) pp grid.text(label = "B" , x = unit(0.06, "npc") , y = unit(0.93,"npc"), gp=gpar(fontsize=11, fontface="bold") ) dev.print(tiff, file = paste0("plots/","Ordination-Fecal-Week7-MALE-top-vs-low-anxiety-1xRarefy-withSpiders-vFINAL.tiff"), width = 8.7, height = 6, units = "cm", res=600, compression="lzw") # Test hypothesis that microbiota vary (with different centroids) by groups # Calculate bray curtis distance matrix set.seed(123) bray.rare.male.fecalw7.dc.16s <- phyloseq::distance(rare.male.fecalw7.dc.16s, method = "bray") sampledf <- data.frame(sample_data(rare.male.fecalw7.dc.16s)) str(sampledf) # # Refer to PERMANOVA test below with both anxiety grouping and cage # # Adonis test # set.seed(123) # adonis(bray.rare.male.fecalw7.dc.16s ~ group, data = sampledf) # # Call: # # adonis(formula = bray.rare.male.fecalw7.dc.16s ~ group, data = sampledf) # # # # Permutation: free # # Number of permutations: 999 # # # # Terms added sequentially (first to last) # # # # Df SumsOfSqs MeanSqs F.Model R2 Pr(>F) # # group 1 0.02614 0.026140 0.30869 0.01783 0.962 # # Residuals 17 1.43958 0.084681 0.98217 # # Total 18 1.46572 1.00000 beta <- betadisper(bray.rare.male.fecalw7.dc.16s, sampledf$group) set.seed(123) permutest(beta) # Permutation test for homogeneity of multivariate dispersions # Permutation: free # Number of permutations: 999 # # Response: Distances # Df Sum Sq Mean Sq F N.Perm Pr(>F) # Groups 1 0.001607 0.0016068 0.2727 999 0.645 # Residuals 17 0.100173 0.0058925 # Adonis test set.seed(123) adonis(bray.rare.male.fecalw7.dc.16s ~ group + Cage.Name, data = sampledf) # Call: # adonis(formula = bray.rare.male.fecalw7.dc.16s ~ group + Cage.Name, data = sampledf) # # Permutation: free # Number of permutations: 999 # # Terms added sequentially (first to last) # # Df SumsOfSqs MeanSqs F.Model R2 Pr(>F) # group 1 0.02614 0.026140 0.54212 0.01783 0.772 # Cage.Name 8 1.00562 0.125703 2.60696 0.68609 0.002 ** # Residuals 9 0.43396 0.048218 0.29607 # Total 18 1.46572 1.00000 # --- # Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 table( rare.male.fecalw7.dc.16s@sam_data$group ) # anxious less anxious # 10 9 table( rare.male.fecalw7.dc.16s@sam_data$Cage.Name ) # C1 C3 C6 H1 H2 H5 L2 L4 L6 # 3 1 2 2 3 2 3 2 1 # #------------------------ #### Compare CECAL microbiota ordination and composition of #### Top third vs. lowest third Open Field centre time - MALE only #### Ordination #------------------------ cecal.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 550 taxa and 53 samples ] # sample_data() Sample Data: [ 53 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 550 taxa by 7 taxonomic ranks ] str(of.post) sel <- which(of.post.COPY$Sex=="Male") beh_male <- of.post.COPY[sel, ] quantile(beh_male$Centre_time, probs = c(1/3, 2/3) ) # 33.33333% 66.66667% # 25.70000 69.06667 beh_male$group <- NA sel <- which(beh_male$Centre_time <= quantile(beh_male$Centre_time, probs = c(1/3) )) beh_male$group[sel] <- "anxious" sel <- which(beh_male$Centre_time >= quantile(beh_male$Centre_time, probs = c(2/3) )) beh_male$group[sel] <- "less anxious" # remove middle third ok <- complete.cases(beh_male) sel <- which(ok==TRUE) beh_male_groups <- beh_male[sel, ] dim(beh_male_groups) # 19 39 beh_male_groups[ ,c("ID","Treatment","Centre_time","group")] # ID Treatment Centre_time group # 1 C1m1 Control 13.2 anxious # 2 C1m2 Control 128.8 less anxious # 3 C1m3 Control 71.6 less anxious # 8 C3m2 Control 208.3 less anxious # 16 C6m1 Control 86.8 less anxious # 18 C6m3 Control 21.8 anxious # 19 H1m1 High 21.2 anxious # 20 H1m2 High 25.7 anxious # 22 H2m1 High 13.8 anxious # 23 H2m2 High 289.9 less anxious # 24 H2m3 High 70.8 less anxious # 32 H5m2 High 89.3 less anxious # 33 H5m3 High 279.0 less anxious # 40 L2m1 Low 13.2 anxious # 41 L2m2 Low 9.0 anxious # 42 L2m3 Low 92.0 less anxious # this sample L2m3Ce failed sequencing # 46 L4m1 Low 11.4 anxious # 47 L4m2 Low 23.2 anxious # 53 L6m2 Low 25.7 anxious males <- as.character(beh_male_groups$ID) male.cecal.dc.16s <- prune_samples(cecal.dc.16s@sam_data$mouseID %in% males, cecal.dc.16s) male.cecal.dc.16s min(sample_sums(male.cecal.dc.16s)) # 37094 min(taxa_sums(male.cecal.dc.16s)) # 0 # prune taxa that have zero sequence reads male.cecal.dc.16s <- prune_taxa( taxa = taxa_sums(male.cecal.dc.16s) > 0, x = male.cecal.dc.16s ) male.cecal.dc.16s # phyloseq-class experiment-level object # otu_table() OTU Table: [ 502 taxa and 18 samples ] # sample_data() Sample Data: [ 18 samples by 20 sample variables ] # tax_table() Taxonomy Table: [ 502 taxa by 7 taxonomic ranks ] table( male.cecal.dc.16s@sam_data$Treatment ) # Control High Low # 6 7 5 ## Use lowest number of reads to create rarefied dataset seed <- 123 rare.male.cecal.dc.16s <- rarefy_even_depth(male.cecal.dc.16s, sample.size = min(sample_sums(male.cecal.dc.16s)), rngseed = seed, replace = FALSE, trimOTUs = TRUE, verbose = TRUE) sample_sums(rare.male.cecal.dc.16s) # all 37094 ntaxa(rare.male.cecal.dc.16s) # 475 nsamples(rare.male.cecal.dc.16s) # 18 ### ORDINATION PLOT [finished-plot] # # # # # # # # # # # # # # # ### NMDS + Bray-Curtis set.seed(123) ord <- ordinate(rare.male.cecal.dc.16s, "NMDS", "bray") ord # Call: # metaMDS(comm = veganifyOTU(physeq), distance = distance) # # global Multidimensional Scaling using monoMDS # # Data: wisconsin(sqrt(veganifyOTU(physeq))) # Distance: bray # # Dimensions: 2 # Stress: 6.338062e-05 # Stress type 1, weak ties # Two convergent solutions found after 20 tries # Scaling: centring, PC rotation, halfchange scaling # Species: expanded scores based on ‘wisconsin(sqrt(veganifyOTU(physeq)))’ # Warning message: # In metaMDS(veganifyOTU(physeq), distance, ...) : # stress is (nearly) zero: you may have insufficient data str(ord) rare.male.cecal.dc.16s@sam_data$samp_type # all "cecal" rare.male.cecal.dc.16s@sam_data$Time # all "Post-exposure" ## join to group: anxious vs less anxious dat <- rare.male.cecal.dc.16s@sam_data dat <- data.frame(dat) class(dat) head(dat) # join data dat <- merge(x = dat, y = beh_male_groups[ ,c("ID","group")], by.x= "mouseID", by.y= "ID", all.x = TRUE ) row.names(dat) <- dat$sample # re-assign joined data to phyloseq object sample_data(rare.male.cecal.dc.16s) <- dat p <- plot_ordination(rare.male.cecal.dc.16s, ord, type="samples", color="group", shape="Cage.Name") p str(p) p$data$Cage.Name <- factor(p$data$Cage.Name, levels = c( paste0("C",1:6),paste0("L",1:6),paste0("H",1:6) ), ordered = TRUE) p$data$Sex #p$data$group <- factor(p$data$group, levels = c("anxious" , "less anxious") ) p$data$group <- factor(p$data$group, levels = c("anxious" , "less anxious"), labels = c("1/3 most\nanxious", "1/3 least\nanxious") ) # cols <- c("Control" = "#f46d43", "Low" = "#66c2a5" , "High" = "#5e4fa2" , # orange, aqua, dark-blue # "anxious" = "#ffcc00", "less anxious" = "#3399ff") cols <- c("Control" = "#f46d43", "Low" = "#66c2a5" , "High" = "#5e4fa2" , # orange, aqua, dark-blue "1/3 most\nanxious" = "#ffcc00", "1/3 least\nanxious" = "#3399ff") shapes <- c(1:18) pp <- p + theme_bw() + scale_colour_manual(values = cols) + scale_shape_manual(values = shapes, name = "Enclosure") + scale_fill_manual(values = cols) + #geom_polygon(aes(fill = Treatment), alpha = 0.3) + geom_point(aes(colour = group)) + annotate(geom="text", x= 0.43, y= -0.3, label = paste0("Stress = ",round(ord$stress,5)), hjust=1, vjust=0, size=3.25 ) + guides( color = guide_legend(order = 0), shape = guide_legend(order = 1) ) + guides( shape = guide_legend(ncol = 2) ) # + pp # Test hypothesis that microbiota vary (with different centroids) by Treatment # Calculate bray curtis distance matrix set.seed(123) bray.rare.male.cecal.dc.16s <- phyloseq::distance(rare.male.cecal.dc.16s, method = "bray") sampledf <- data.frame(sample_data(rare.male.cecal.dc.16s)) str(sampledf) beta <- betadisper(bray.rare.male.cecal.dc.16s, sampledf$group) set.seed(123) permutest(beta) # Permutation test for homogeneity of multivariate dispersions # Permutation: free # Number of permutations: 999 # # Response: Distances # Df Sum Sq Mean Sq F N.Perm Pr(>F) # Groups 1 0.000289 0.0002891 0.0431 999 0.84 # Residuals 16 0.107197 0.0066998 # Adonis test set.seed(123) adonis(bray.rare.male.cecal.dc.16s ~ group + Cage.Name, data = sampledf) # Call: # adonis(formula = bray.rare.male.cecal.dc.16s ~ group + Cage.Name, data = sampledf) # # Permutation: free # Number of permutations: 999 # # Terms added sequentially (first to last) # # Df SumsOfSqs MeanSqs F.Model R2 Pr(>F) # group 1 0.10414 0.104138 2.5673 0.08109 0.067 . # Cage.Name 8 0.85553 0.106942 2.6364 0.66621 0.001 *** # Residuals 8 0.32451 0.040563 0.25270 # Total 17 1.28418 1.00000 # --- # Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 table( rare.male.cecal.dc.16s@sam_data$group ) # anxious less anxious # 10 8 table( rare.male.cecal.dc.16s@sam_data$Cage.Name ) # C1 C3 C6 H1 H2 H5 L2 L4 L6 # 3 1 2 2 3 2 2 2 1 # #------------------------ #### END