# Selway C.A. - October 2018 ## Pre-processing for Urban International Soils ### Mills et al. (2021) Rare genera define urban green space soil bacterial communities in three cities across the world #### Samples have been uploaded to QIITA (Playford, AUS: StudyID 13064; Bournemouth, UK and Haikou, CHN: Study ID 13559) # bcl files collected from Illumnia MiSeq (v2) were converted to FASTQ sequences using bcl2fastq v1.8.4 ############################# CALLING QIIME2 ############################# source activate qiime2-2018.8 ################### IMPORTING OF SEQUENCES INTO QIIME2 ################### qiime tools import \ --type EMPSingleEndSequences \ --input-path hgs161222_emp-single-end-sequences \ --output-path hgs161222_emp-single-end-sequences.qza qiime tools import \ --type EMPSingleEndSequences \ --input-path hgs170307_emp-single-end-sequences \ --output-path hgs170307_emp-single-end-sequences.qza qiime tools import \ --type EMPSingleEndSequences \ --input-path hgs170410_emp-single-end-sequences \ --output-path hgs170410_emp-single-end-sequences.qza qiime tools import \ --type EMPSingleEndSequences \ --input-path hgs170424_emp-single-end-sequences \ --output-path hgs170424_emp-single-end-sequences.qza qiime tools import \ --type EMPSingleEndSequences \ --input-path hgs170705_emp-single-end-sequences \ --output-path hgs170705_emp-single-end-sequences.qza qiime tools import \ --type EMPSingleEndSequences \ --input-path hgs170706_emp-single-end-sequences \ --output-path hgs170706_emp-single-end-sequences.qza ##################### DEMULTIPLEXING SEQUENCING RUNS ##################### qiime demux emp-single \ --i-seqs hgs161222_emp-single-end-sequences.qza \ --m-barcodes-file 161221_IMS_CSeJMiLWe_16S_PlayfordRep1HumanGreenSpaces_mappingfile.txt \ --m-barcodes-column BarcodeSequence \ --o-per-sample-sequences hgs161222_demux.qza qiime demux emp-single \ --i-seqs hgs170307_emp-single-end-sequences.qza \ --m-barcodes-file 170307_IMS_CSeESk_16S_PlayfordRep2and3HumanGreenSpaces_mappingfile.txt \ --m-barcodes-column BarcodeSequence \ --p-rev-comp-mapping-barcodes \ --o-per-sample-sequences hgs170307_demux.qza qiime demux emp-single \ --i-seqs hgs170410_emp-single-end-sequences.qza \ --m-barcodes-file 170410_IMS_CSe_16S_UKHumanGreenSpaces_mappingfile.txt \ --m-barcodes-column BarcodeSequence \ --p-rev-comp-mapping-barcodes \ --o-per-sample-sequences hgs170410_demux.qza qiime demux emp-single \ --i-seqs hgs170424_emp-single-end-sequences.qza \ --m-barcodes-file 170424_IMS_ESk_16S_IndiaHGS_TimeStorage_mappingfile.txt \ --m-barcodes-column BarcodeSequence \ --p-rev-comp-mapping-barcodes \ --o-per-sample-sequences hgs170424_demux.qza qiime demux emp-single \ --i-seqs hgs170705_emp-single-end-sequences.qza \ --m-barcodes-file 170705_IMS_JYoJMi_16S_WTGreenSpaces1_mappingfile.txt \ --m-barcodes-column BarcodeSequence \ --p-rev-comp-mapping-barcodes \ --o-per-sample-sequences hgs170705_demux.qza qiime demux emp-single \ --i-seqs hgs170706_emp-single-end-sequences.qza \ --m-barcodes-file 170706_IMS_JYoJMi_16S_WTGreenSpaces2_mappingfile.txt \ --m-barcodes-column BarcodeSequence \ --p-rev-comp-mapping-barcodes \ --o-per-sample-sequences hgs170706_demux.qza ####################### SUMMARISING DEMUX RESULTS ######################## qiime demux summarize \ --i-data hgs161222_demux.qza \ --o-visualization hgs161222_demux.qzv qiime demux summarize \ --i-data hgs170307_demux.qza \ --o-visualization hgs170307_demux.qzv qiime demux summarize \ --i-data hgs170410_demux.qza \ --o-visualization hgs170410_demux.qzv qiime demux summarize \ --i-data hgs170424_demux.qza \ --o-visualization hgs170424_demux.qzv qiime demux summarize \ --i-data hgs170705_demux.qza \ --o-visualization hgs170705_demux.qzv qiime demux summarize \ --i-data hgs170706_demux.qza \ --o-visualization hgs170706_demux.qzv ########################### QUALITY FILTERING ############################ qiime quality-filter q-score \ --i-demux hgs161222_demux.qza \ --o-filtered-sequences hgs161222_demux-filtered.qza \ --o-filter-stats hgs161222_demux-filter-stats.qza qiime quality-filter q-score \ --i-demux hgs170307_demux.qza \ --o-filtered-sequences hgs170307_demux-filtered.qza \ --o-filter-stats hgs170307_demux-filter-stats.qza qiime quality-filter q-score \ --i-demux hgs170410_demux.qza \ --o-filtered-sequences hgs170410_demux-filtered.qza \ --o-filter-stats hgs170410_demux-filter-stats.qza qiime quality-filter q-score \ --i-demux hgs170424_demux.qza \ --o-filtered-sequences hgs170424_demux-filtered.qza \ --o-filter-stats hgs170424_demux-filter-stats.qza qiime quality-filter q-score \ --i-demux hgs170705_demux.qza \ --o-filtered-sequences hgs170705_demux-filtered.qza \ --o-filter-stats hgs170705_demux-filter-stats.qza qiime quality-filter q-score \ --i-demux hgs170706_demux.qza \ --o-filtered-sequences hgs170706_demux-filtered.qza \ --o-filter-stats hgs170706_demux-filter-stats.qza ################### ASSIGNING SEQUENCES THROUGH DEBLUR ################### # After investigating the quality of the sequencing runs, we chose a length of 150bp qiime deblur denoise-16S \ --i-demultiplexed-seqs hgs161222_demux-filtered.qza \ --p-trim-length 150 \ --o-representative-sequences hgs161222_rep-seqs-deblur.qza \ --o-table hgs161222_table-deblur.qza \ --p-sample-stats \ --o-stats hgs161222_deblur-stats.qza qiime deblur denoise-16S \ --i-demultiplexed-seqs hgs170307_demux-filtered.qza \ --p-trim-length 150 \ --o-representative-sequences hgs170307_rep-seqs-deblur.qza \ --o-table hgs170307_table-deblur.qza \ --p-sample-stats \ --o-stats hgs170307_deblur-stats.qza qiime deblur denoise-16S \ --i-demultiplexed-seqs hgs170410_demux-filtered.qza \ --p-trim-length 150 \ --o-representative-sequences hgs170410_rep-seqs-deblur.qza \ --o-table hgs170410_table-deblur.qza \ --p-sample-stats \ --o-stats hgs170410_deblur-stats.qza qiime deblur denoise-16S \ --i-demultiplexed-seqs hgs170424_demux-filtered.qza \ --p-trim-length 150 \ --o-representative-sequences hgs170424_rep-seqs-deblur.qza \ --o-table hgs170424_table-deblur.qza \ --p-sample-stats \ --o-stats hgs170424_deblur-stats.qza qiime deblur denoise-16S \ --i-demultiplexed-seqs hgs170705_demux-filtered.qza \ --p-trim-length 150 \ --o-representative-sequences hgs170705_rep-seqs-deblur.qza \ --o-table hgs170705_table-deblur.qza \ --p-sample-stats \ --o-stats hgs170705_deblur-stats.qza qiime deblur denoise-16S \ --i-demultiplexed-seqs hgs170706_demux-filtered.qza \ --p-trim-length 150 \ --o-representative-sequences hgs170706_rep-seqs-deblur.qza \ --o-table hgs170706_table-deblur.qza \ --p-sample-stats \ --o-stats hgs170706_deblur-stats.qza ####################### SUMMARISING DEBLUR RESULTS ####################### qiime metadata tabulate \ --m-input-file hgs161222_demux-filter-stats.qza \ --o-visualization hgs161222_demux-filter-stats.qzv qiime deblur visualize-stats \ --i-deblur-stats hgs161222_deblur-stats.qza \ --o-visualization hgs161222_deblur-stats.qzv qiime metadata tabulate \ --m-input-file hgs170307_demux-filter-stats.qza \ --o-visualization hgs170307_demux-filter-stats.qzv qiime deblur visualize-stats \ --i-deblur-stats hgs170307_deblur-stats.qza \ --o-visualization hgs170307_deblur-stats.qzv qiime metadata tabulate \ --m-input-file hgs170410_demux-filter-stats.qza \ --o-visualization hgs170410_demux-filter-stats.qzv qiime deblur visualize-stats \ --i-deblur-stats hgs170410_deblur-stats.qza \ --o-visualization hgs170410_deblur-stats.qzv qiime metadata tabulate \ --m-input-file hgs170424_demux-filter-stats.qza \ --o-visualization hgs170424_demux-filter-stats.qzv qiime deblur visualize-stats \ --i-deblur-stats hgs170424_deblur-stats.qza \ --o-visualization hgs170424_deblur-stats.qzv qiime metadata tabulate \ --m-input-file hgs170705_demux-filter-stats.qza \ --o-visualization hgs170705_demux-filter-stats.qzv qiime deblur visualize-stats \ --i-deblur-stats hgs170705_deblur-stats.qza \ --o-visualization hgs170705_deblur-stats.qzv qiime metadata tabulate \ --m-input-file hgs170706_demux-filter-stats.qza \ --o-visualization hgs170706_demux-filter-stats.qzv qiime deblur visualize-stats \ --i-deblur-stats hgs170706_deblur-stats.qza \ --o-visualization hgs170706_deblur-stats.qzv ###### MERGING FEATURE TABLES AND SEQUENCES FROM ALL SEQUENCING RUNS ##### qiime feature-table merge \ --i-tables hgs161222_table-deblur.qza \ --i-tables hgs170307_table-deblur.qza \ --i-tables hgs170410_table-deblur.qza \ --i-tables hgs170424_table-deblur.qza \ --i-tables hgs170705_table-deblur.qza \ --i-tables hgs170706_table-deblur.qza \ --o-merged-table human_green_space_merged_table.qza qiime feature-table merge-seqs \ --i-data hgs161222_rep-seqs-deblur.qza \ --i-data hgs170307_rep-seqs-deblur.qza \ --i-data hgs170410_rep-seqs-deblur.qza \ --i-data hgs170424_rep-seqs-deblur.qza \ --i-data hgs170705_rep-seqs-deblur.qza \ --i-data hgs170706_rep-seqs-deblur.qza \ --o-merged-data human_green_space_merged_seqs.qza ################### REMOVE SAMPLES FROM OTHER PROJECTS ################### qiime feature-table filter-samples \ --i-table human_green_space_merged_table.qza \ --m-metadata-file Playford_UK_India_mapping_file.txt \ --p-where "Project='HumanGreenSpaces'" \ --o-filtered-table human_green_space_merged_table.qza ######### SUMMARISING FEATURE TABLE AND REPRESENTATIVE SEQUENCES ######### qiime feature-table summarize \ --i-table human_green_space_merged_table.qza \ --o-visualization human_green_space_merged_table.qzv \ --m-sample-metadata-file Playford_UK_India_mapping_file.txt qiime feature-table tabulate-seqs \ --i-data human_green_space_merged_seqs.qza \ --o-visualization human_green_space_merged_seqs.qzv ####################### CREATING PHYLOGENETIC TREE ######################## qiime phylogeny align-to-tree-mafft-fasttree \ --i-sequences human_green_space_merged_seqs.qza \ --o-alignment aligned-rep-seqs.qza \ --o-masked-alignment masked-aligned-rep-seqs.qza \ --o-tree unrooted-tree.qza \ --o-rooted-tree rooted-tree.qza ################ SELECTING SPECIFIC SAMPLES FOR THIS STUDY ################ ## Just want to filter the environmental samples and their controls qiime feature-table filter-samples \ --i-table human_green_space_merged_table.qza \ --m-metadata-file Playford_UK_India_mapping_file.txt \ --p-where "InternationalSoils='Y'" \ --o-filtered-table green_space_merged_table_final.qza qiime feature-table summarize \ --i-table green_space_merged_table_final.qza\ --o-visualization green_space_merged_table_final.qzv ######################### CLASSIFYING SEQUENCES ########################## qiime feature-table filter-seqs \ --i-data human_green_space_merged_seqs.qza \ --i-table green_space_merged_table_final.qza \ --o-filtered-data green_space_merged_seqs.qza qiime feature-table tabulate-seqs \ --i-data green_space_merged_seqs.qza \ --o-visualization green_space_merged_seqs.qzv qiime feature-classifier classify-sklearn \ --i-classifier silva-132-99-515-806-nb-classifier.qza \ --i-reads green_space_merged_seqs.qza \ --o-classification silva_environmental_green_space_taxonomy.qza qiime metadata tabulate \ --m-input-file silva_environmental_green_space_taxonomy.qza \ --o-visualization silva_environmental_green_space_taxonomy.qzv # export taxonomy for downstream analyses qiime tools export \ --input-path silva_environmental_green_space_taxonomy.qza \ --output-path silva_taxonomy_environmental ############################ EXPORTING SAMPLES ############################ # exporting biom files for downstream analysis qiime tools export \ --input-path green_space_merged_table_final.qza \ --output-path Environmental_biom_table biom convert -i Environmental_biom_table/feature-table.biom -o environmental_green_space_biom_table.tsv --to-tsv