--- title: "Sig-genes-in-pathway" author: "Kelly Yan Ren" date: "`r format(Sys.time(), '%d %B, %Y')`" output: bookdown::html_document2: default link-citations: yes fig_caption: yes toc: yes --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE) ``` # Loading packages ```{r packages} library(magrittr) library(ggVennDiagram) library(readxl) '%!in%' <- function(x,y)!('%in%'(x,y)) ``` # Major analysis ## Read the GO results and genes ```{r} HB_polledvshorned_GO <- read.csv("polledvshornedForhornbud-ARS_jo-GOresults_p0.05_lfc1.csv") HB_polledvshorned_genes <- read.csv("polledvshornedForhornbud-ARS_jo-DEG_p0.05_lfc1.csv") ``` ```{r} FS_polledvshorned_GO <- read.csv("polledvshornedForfrontal_skin-ARS_jo-GOresults_p0.05_lfc1.csv") FS_polledvshorned_genes <- read.csv("polledvshornedForfrontal_skin-ARS_jo-DEG_p0.05_lfc1.csv") ``` ```{r} Cattle = useMart(biomart="ensembl", dataset="btaurus_gene_ensembl") ### GO terms match their entrezgene_id and ensembl_gene_id cattle_genes_GO <- getBM( attributes= c("chromosome_name","start_position","end_position","strand","ensembl_gene_id","external_gene_name", "description","go_id","name_1006", "definition_1006"),mart= Cattle) head(cattle_genes_GO) ``` ### HB ```{r} # subset for all genes in GO HB_polledvshorned_GO_cattle_genes <- subset(cattle_genes_GO,go_id %in% HB_polledvshorned_GO$goid) # marked the enriched genes HB_polledvshorned_GO_cattle_genes%<>%left_join(HB_polledvshorned_genes[,c("gene_id","logFC")], by = c("ensembl_gene_id" = "gene_id")) HB_polledvshorned_GO_cattle_genes$in_DE <- HB_polledvshorned_GO_cattle_genes$ensembl_gene_id %in% HB_polledvshorned_genes$gene_id # marked the up and down regulated genes HB_polledvshorned_GO_cattle_genes$up_regulation <- HB_polledvshorned_GO_cattle_genes$logFC > 0 ``` ### FS ```{r} # subset for all genes in GO FS_polledvshorned_GO_cattle_genes <- subset(cattle_genes_GO,go_id %in% FS_polledvshorned_GO$goid) # marked the enriched genes FS_polledvshorned_GO_cattle_genes%<>%left_join(FS_polledvshorned_genes[,c("gene_id","logFC")], by = c("ensembl_gene_id" = "gene_id")) FS_polledvshorned_GO_cattle_genes$in_DE <- FS_polledvshorned_GO_cattle_genes$ensembl_gene_id %in% FS_polledvshorned_genes$gene_id # marked the up and down regulated genes FS_polledvshorned_GO_cattle_genes$up_regulation <- FS_polledvshorned_GO_cattle_genes$logFC > 0 ``` ```{r} FS_polledvshorned_GO_cattle_genes$in_HB <- FS_polledvshorned_GO_cattle_genes$ensembl_gene_id %in% HB_polledvshorned_GO_cattle_genes$ensembl_gene_id HB_polledvshorned_GO_cattle_genes$in_FS <- HB_polledvshorned_GO_cattle_genes$ensembl_gene_id %in% FS_polledvshorned_GO_cattle_genes$ensembl_gene_id write_csv(FS_polledvshorned_GO_cattle_genes,"polledvshornedForfrontal_skin-ARS_jo-enriched_genes_in_GO_terms.csv") write_csv(HB_polledvshorned_GO_cattle_genes,"polledvshornedForhornbud-ARS_jo-enriched_genes_in_GO_terms.csv") ``` ### VennDiagram for common terms ```{r} common_GO_terms <- unique(FS_polledvshorned_GO_cattle_genes$go_id)[!unique(FS_polledvshorned_GO_cattle_genes$go_id)[match(unique(FS_polledvshorned_GO_cattle_genes$go_id),unique(HB_polledvshorned_GO_cattle_genes$go_id))]%>%is.na()] total <- list() plot <- list() for (a in 1:8){ VennDiagram_list <- list() VennDiagram_list$HB <- subset(HB_polledvshorned_GO_cattle_genes,go_id %in% common_GO_terms[a])%>% subset(in_DE %in% "TRUE")%>% extract2("ensembl_gene_id") VennDiagram_list$FS <- subset(FS_polledvshorned_GO_cattle_genes,go_id %in% common_GO_terms[a])%>% subset(in_DE %in% "TRUE")%>% extract2("ensembl_gene_id") total[[a]] <- (subset(HB_polledvshorned_GO_cattle_genes,go_id %in% common_GO_terms[a])%>%dim())[1] ggVennDiagram(VennDiagram_list,main = "d") ggsave(filename = paste0(paste0(paste0(gsub(":","",common_GO_terms[a]),"_siggenes_HBvsFS","_total"),"",total[[a]]),"",".pdf"),device = "pdf")} ``` # indiidually check some GO terms ### common term GO0003008 ```{r} # Because changing the version of GO database some terms don't have direct genes # No direct genes in this GO term for updated database GO_term_GO0003008 <- read_xlsx("manually_dlownload_GO/GO0003008.xlsx") GO_term_GO0003008%>%subset(Direct_GO_ID %in% "GO:0003008") GO_term_GO0003008_Protein_name <- unique(GO_term_GO0003008$Protein_name) ggVennDiagram(list(HB=GO_term_GO0003008_Protein_name[GO_term_GO0003008_Protein_name %in% subset(cattle_genes_GO,ensembl_gene_id %in% HB_polledvshorned_genes$gene_id)$external_gene_name], FS=GO_term_GO0003008_Protein_name[GO_term_GO0003008_Protein_name %in% subset(cattle_genes_GO,ensembl_gene_id %in% FS_polledvshorned_genes$gene_id)$external_gene_name])) ggsave(filename = paste0(paste0(paste0(gsub(":","","GO0003008"),"_siggenes_HBvsFS","_total"),"",length(GO_term_GO0003008_Protein_name)),"",".pdf"),device = "pdf") GO_term_GO0003008$in_HB <- GO_term_GO0003008$Protein_name %in% subset(cattle_genes_GO,ensembl_gene_id %in% HB_polledvshorned_genes$gene_id)$external_gene_name GO_term_GO0003008$in_FS <- GO_term_GO0003008$Protein_name %in% subset(cattle_genes_GO,ensembl_gene_id %in% FS_polledvshorned_genes$gene_id)$external_gene_name write_csv(GO_term_GO0003008,"manually_dlownload_GO/polledvshornedForhornbud-ARS_jo-enriched_genes_in_GO0003008_terms.csv") ``` ### HB term GO0051704 ```{r} # No direct genes in this GO term for updated database GO_term_GO0051704 <- read_xlsx("manually_dlownload_GO/GO0051704.xlsx") GO_term_GO0051704%>%subset(Direct_GO_ID %in% "GO:0051704") GO_term_GO0051704$in_DE <- GO_term_GO0051704$Protein_name %in% subset(cattle_genes_GO,ensembl_gene_id %in% HB_polledvshorned_genes$gene_id)$external_gene_name GO_term_GO0051704$in_FS <- GO_term_GO0051704$Protein_name %in% subset(cattle_genes_GO,ensembl_gene_id %in% FS_polledvshorned_genes$gene_id)$external_gene_name write_csv(GO_term_GO0051704,"manually_dlownload_GO/polledvshornedForhornbud-ARS_jo-enriched_genes_in_GO0051704_terms.csv") ``` ### HB term GO0061783 ```{r} # No direct genes in this GO term for updated database GO_term_GO0061783 <- read_xlsx("manually_dlownload_GO/GO0061783.xlsx") GO_term_GO0061783%>%subset(Direct_GO_ID %in% "GO:0061783") GO_term_GO0061783$in_DE <- GO_term_GO0061783$Protein_name %in% subset(cattle_genes_GO,ensembl_gene_id %in% HB_polledvshorned_genes$gene_id)$external_gene_name GO_term_GO0061783$in_FS <- GO_term_GO0061783$Protein_name %in% subset(cattle_genes_GO,ensembl_gene_id %in% FS_polledvshorned_genes$gene_id)$external_gene_name write_csv(GO_term_GO0061783,"manually_dlownload_GO/polledvshornedForhornbud-ARS_jo-enriched_genes_in_GO0061783_terms.csv") ``` ### FS term GO0060089 ```{r} # No direct genes in this GO term for updated database GO_term_GO0060089 <- read_xlsx("manually_dlownload_GO/GO0060089.xlsx") GO_term_GO0060089%>%subset(Direct_GO_ID %in% "GO:0060089") GO_term_GO0060089$in_DE <- GO_term_GO0060089$Protein_name %in% subset(cattle_genes_GO,ensembl_gene_id %in% FS_polledvshorned_genes$gene_id)$external_gene_name GO_term_GO0060089$in_HB <- GO_term_GO0060089$Protein_name %in% subset(cattle_genes_GO,ensembl_gene_id %in% HB_polledvshorned_genes$gene_id)$external_gene_name write_csv(GO_term_GO0060089,"manually_dlownload_GO/polledvshornedForhornbud-ARS_jo-enriched_genes_in_GO0060089_terms.csv") ``` # Appendix ```{r} sessionInfo() ```