Data import
phenotype
polledvshornedFS_DEG <- read_csv("polledvshornedForfrontal_skin-ARS_jo-DEG_p0.05_lfc1.csv")
polledvshornedFS_DEG
## # A tibble: 7,012 x 11
## X1 gene_id chr gene_name gene_biotype logFC AveExpr t P.Value
## <dbl> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1 ENSBTA… 19 RBFOX3 protein_cod… 1.96 5.91 8.88 1.56e-9
## 2 2 ENSBTA… 24 CELF4 protein_cod… 2.22 5.75 8.74 2.16e-9
## 3 3 ENSBTA… 28 CDH23 protein_cod… 2.27 6.00 8.47 4.06e-9
## 4 4 ENSBTA… 8 GABBR2 protein_cod… 1.86 6.05 7.90 1.59e-8
## 5 5 ENSBTA… 10 not avai… protein_cod… -1.88 5.32 -7.86 1.76e-8
## 6 6 ENSBTA… 4 PTPRN2 protein_cod… 2.26 6.14 7.77 2.22e-8
## 7 7 ENSBTA… 10 not avai… lncRNA 2.23 5.75 7.74 2.36e-8
## 8 8 ENSBTA… 6 not avai… protein_cod… -1.35 3.93 -7.69 2.67e-8
## 9 9 ENSBTA… 7 NRG2 protein_cod… 2.23 5.19 7.68 2.72e-8
## 10 10 ENSBTA… 10 DPF3 protein_cod… 1.80 5.63 7.67 2.85e-8
## # … with 7,002 more rows, and 2 more variables: adj.P.Val <dbl>, B <dbl>
polledvshornedHB_DEG <- read_csv("polledvshornedForhornbud-ARS_jo-DEG_p0.05_lfc1.csv")
polledvshornedHB_DEG
## # A tibble: 7,586 x 11
## X1 gene_id chr gene_name gene_biotype logFC AveExpr t P.Value
## <dbl> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1 ENSBTA… 5 KRT6B protein_cod… -4.15 2.25 -10.4 5.44e-11
## 2 2 ENSBTA… 3 MCOLN2 protein_cod… 2.01 3.19 10.3 6.56e-11
## 3 3 ENSBTA… 20 NIM1K protein_cod… 1.83 3.86 10.1 1.11e-10
## 4 4 ENSBTA… 21 HSP90AA1 protein_cod… -1.71 8.14 -9.98 1.36e-10
## 5 5 ENSBTA… 14 not avai… protein_cod… 1.95 2.55 9.95 1.43e-10
## 6 6 ENSBTA… 7 CALR protein_cod… -1.71 6.40 -9.90 1.61e-10
## 7 7 ENSBTA… 5 not avai… protein_cod… -4.49 1.98 -9.94 1.46e-10
## 8 8 ENSBTA… 5 KRT1 protein_cod… -5.75 2.36 -9.93 1.49e-10
## 9 9 ENSBTA… 1 MELTF protein_cod… 2.39 2.37 9.87 1.71e-10
## 10 10 ENSBTA… 26 JAKMIP3 protein_cod… 2.04 3.21 9.80 1.97e-10
## # … with 7,576 more rows, and 2 more variables: adj.P.Val <dbl>, B <dbl>
polledvshornedFB_DEG <- read_csv("polledvshornedForforebrain-ARS_jo-DEG_p0.05_lfc1.csv")
polledvshornedFB_DEG
## # A tibble: 4,594 x 11
## X1 gene_id chr gene_name gene_biotype logFC AveExpr t P.Value
## <dbl> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1 ENSBTA… 5 KERA protein_cod… -3.24 2.60 -12.4 1.04e-12
## 2 2 ENSBTA… 5 not avai… protein_cod… -1.63 4.69 -9.94 1.46e-10
## 3 3 ENSBTA… 6 PAICS protein_cod… -1.24 5.32 -9.72 2.38e-10
## 4 4 ENSBTA… 8 not avai… protein_cod… -1.75 4.50 -9.71 2.44e-10
## 5 5 ENSBTA… X TMEM47 protein_cod… -1.72 5.60 -9.67 2.63e-10
## 6 6 ENSBTA… 6 MRFAP1L1 protein_cod… -1.66 5.35 -9.48 4.05e-10
## 7 7 ENSBTA… 28 ZNF25 protein_cod… -1.70 4.95 -9.43 4.53e-10
## 8 8 ENSBTA… 19 YWHAE protein_cod… -1.35 7.38 -9.42 4.61e-10
## 9 9 ENSBTA… X MORF4L2 protein_cod… -1.61 6.74 -9.40 4.83e-10
## 10 10 ENSBTA… 3 SERBP1 protein_cod… -1.66 7.12 -9.36 5.23e-10
## # … with 4,584 more rows, and 2 more variables: adj.P.Val <dbl>, B <dbl>
polledvshornedMB_DEG <- read_csv("polledvshornedFormidbrain-ARS_jo-DEG_p0.05_lfc1.csv")
polledvshornedMB_DEG
## # A tibble: 3,400 x 11
## X1 gene_id chr gene_name gene_biotype logFC AveExpr t P.Value
## <dbl> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1 ENSBTA… 19 YWHAE protein_cod… -1.57 7.38 -11.9 2.66e-12
## 2 2 ENSBTA… 2 HNRNPA3 protein_cod… -1.55 6.08 -11.0 1.72e-11
## 3 3 ENSBTA… X MORF4L2 protein_cod… -1.69 6.74 -10.8 2.55e-11
## 4 4 ENSBTA… 6 PAICS protein_cod… -1.23 5.32 -10.7 2.82e-11
## 5 5 ENSBTA… 27 RWDD4 protein_cod… -1.22 3.68 -10.7 2.68e-11
## 6 6 ENSBTA… 2 SSB protein_cod… -1.69 5.16 -10.7 2.96e-11
## 7 7 ENSBTA… 28 ZNF25 protein_cod… -1.69 4.95 -10.6 3.56e-11
## 8 8 ENSBTA… 8 DNAJA1 protein_cod… -1.56 6.05 -10.5 4.02e-11
## 9 9 ENSBTA… 21 HSP90AA1 protein_cod… -1.80 8.14 -10.6 3.78e-11
## 10 10 ENSBTA… 22 not avai… protein_cod… -1.74 4.70 -10.5 4.26e-11
## # … with 3,390 more rows, and 2 more variables: adj.P.Val <dbl>, B <dbl>
tissues
FBvsFSForhorned_DEG <- read_csv("forebrainvsfrontal_skinForhorned-ARS_jo-DEG_p0.05_lfc1.csv")
FBvsFSForhorned_DEG
## # A tibble: 2,690 x 11
## X1 gene_id chr gene_name gene_biotype logFC AveExpr t P.Value
## <dbl> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1 ENSBTA… 8 RORB protein_cod… 2.99 6.05 11.6 4.81e-12
## 2 2 ENSBTA… 5 KERA protein_cod… 3.43 2.60 11.3 8.07e-12
## 3 3 ENSBTA… 20 C1QTNF3 protein_cod… 3.04 5.70 10.4 5.09e-11
## 4 4 ENSBTA… 24 CDH7 protein_cod… 2.08 6.19 9.90 1.60e-10
## 5 5 ENSBTA… 15 CNTN5 protein_cod… 1.81 7.63 8.89 1.52e- 9
## 6 6 ENSBTA… 20 PDZD2 protein_cod… -1.46 7.77 -8.76 2.07e- 9
## 7 7 ENSBTA… 3 LMX1A protein_cod… 1.62 5.34 8.53 3.51e- 9
## 8 8 ENSBTA… 19 TBX2 protein_cod… -2.21 2.32 -8.58 3.14e- 9
## 9 9 ENSBTA… 23 KHDRBS2 protein_cod… 1.81 7.93 8.42 4.62e- 9
## 10 10 ENSBTA… 6 SGMS2 protein_cod… -1.76 6.26 -8.22 7.43e- 9
## # … with 2,680 more rows, and 2 more variables: adj.P.Val <dbl>, B <dbl>
FBvsFSForpolled_DEG <- read_csv("forebrainvsfrontal_skinForpolled-ARS_jo-DEG_p0.05_lfc1.csv")
FBvsFSForpolled_DEG
## # A tibble: 283 x 11
## X1 gene_id chr gene_name gene_biotype logFC AveExpr t P.Value
## <dbl> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1 ENSBTA… 2 IGFBP5 protein_cod… -1.25 7.46 -7.62 3.20e-8
## 2 2 ENSBTA… 13 NOP56 protein_cod… -1.20 4.64 -7.57 3.65e-8
## 3 3 ENSBTA… 7 P4HA2 protein_cod… -1.10 5.69 -6.98 1.57e-7
## 4 4 ENSBTA… 5 TUBA1B protein_cod… -1.03 7.52 -6.96 1.69e-7
## 5 5 ENSBTA… 3 GPX7 protein_cod… -1.01 4.16 -6.66 3.57e-7
## 6 6 ENSBTA… 19 COL1A1 protein_cod… -1.54 11.2 -6.72 3.06e-7
## 7 7 ENSBTA… 14 DGAT1 protein_cod… -1.58 1.87 -6.90 1.95e-7
## 8 8 ENSBTA… 19 METRNL protein_cod… -1.18 4.15 -6.46 6.14e-7
## 9 9 ENSBTA… 29 ANO5 protein_cod… -1.81 6.53 -6.42 6.79e-7
## 10 10 ENSBTA… 1 COL6A1 protein_cod… -1.06 5.19 -6.27 1.01e-6
## # … with 273 more rows, and 2 more variables: adj.P.Val <dbl>, B <dbl>
FBvsHBForhorned_DEG <- read_csv("forebrainvshornbudForhorned-ARS_jo-DEG_p0.05_lfc1.csv")
FBvsHBForhorned_DEG
## # A tibble: 5,228 x 11
## X1 gene_id chr gene_name gene_biotype logFC AveExpr t P.Value
## <dbl> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1 ENSBTA… 5 KERA protein_cod… 2.67 2.60 17.4 2.81e-16
## 2 2 ENSBTA… 20 MYO10 protein_cod… -1.73 7.79 -14.5 2.60e-14
## 3 3 ENSBTA… 5 KRT6B protein_cod… -4.13 2.25 -14.1 4.59e-14
## 4 4 ENSBTA… 2 TCEA3 protein_cod… -1.41 4.85 -13.8 8.80e-14
## 5 5 ENSBTA… 19 C1QTNF1 protein_cod… -1.78 2.63 -13.7 1.03e-13
## 6 6 ENSBTA… 29 TENM4 protein_cod… -1.50 8.35 -13.5 1.39e-13
## 7 7 ENSBTA… 6 GABRG1 protein_cod… 2.25 4.12 13.4 1.75e-13
## 8 8 ENSBTA… 4 WNT16 protein_cod… 3.83 1.89 14.0 5.70e-14
## 9 9 ENSBTA… 6 GABRA2 protein_cod… 1.59 5.39 13.2 2.36e-13
## 10 10 ENSBTA… 5 KRT1 protein_cod… -6.07 2.36 -13.5 1.30e-13
## # … with 5,218 more rows, and 2 more variables: adj.P.Val <dbl>, B <dbl>
FBvsHBForpolled_DEG <- read_csv("forebrainvshornbudForpolled-ARS_jo-DEG_p0.05_lfc1.csv")
FBvsHBForpolled_DEG
## # A tibble: 116 x 11
## X1 gene_id chr gene_name gene_biotype logFC AveExpr t P.Value
## <dbl> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1 ENSBTA… 3 GPX7 protein_cod… -1.33 4.16 -8.00 1.27e-8
## 2 2 ENSBTA… 7 P4HA2 protein_cod… -1.14 5.69 -6.33 8.60e-7
## 3 3 ENSBTA… 3 YBX1 protein_cod… -1.15 6.60 -6.20 1.21e-6
## 4 4 ENSBTA… 5 ATF4 protein_cod… -1.32 5.14 -5.99 2.10e-6
## 5 5 ENSBTA… 17 TMEM119 protein_cod… -1.65 3.71 -6.00 2.02e-6
## 6 6 ENSBTA… 8 HMGB2 protein_cod… -1.16 5.17 -5.96 2.27e-6
## 7 7 ENSBTA… 13 RRBP1 protein_cod… -1.10 6.17 -5.93 2.44e-6
## 8 8 ENSBTA… 19 PYCR1 protein_cod… -1.32 3.68 -5.88 2.80e-6
## 9 9 ENSBTA… 4 not avai… protein_cod… -1.22 4.43 -5.79 3.53e-6
## 10 10 ENSBTA… 1 COL6A1 protein_cod… -1.09 5.19 -5.71 4.36e-6
## # … with 106 more rows, and 2 more variables: adj.P.Val <dbl>, B <dbl>
MBvsFBForhorned_DEG <- read_csv("midbrainvsforebrainForhorned-ARS_jo-DEG_p0.05_lfc1.csv")
MBvsFBForhorned_DEG
## # A tibble: 97 x 11
## X1 gene_id chr gene_name gene_biotype logFC AveExpr t P.Value
## <dbl> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1 ENSBTA… 5 ALX1 protein_cod… -4.02 5.30 -12.6 7.62e-13
## 2 2 ENSBTA… 13 MATN4 protein_cod… 2.77 2.69 9.67 2.64e-10
## 3 3 ENSBTA… 23 COL11A2 protein_cod… 4.24 4.18 9.60 3.08e-10
## 4 4 ENSBTA… 5 NTS protein_cod… -2.98 2.62 -9.02 1.14e- 9
## 5 5 ENSBTA… 2 SATB2 protein_cod… -1.73 7.77 -8.76 2.08e- 9
## 6 6 ENSBTA… 5 EPYC protein_cod… 3.27 3.69 8.46 4.20e- 9
## 7 7 ENSBTA… 13 MYO3A protein_cod… -1.18 6.41 -8.41 4.71e- 9
## 8 8 ENSBTA… 12 CNMD protein_cod… 3.57 4.37 8.35 5.46e- 9
## 9 9 ENSBTA… 2 MATN1 protein_cod… 5.36 2.14 8.29 6.22e- 9
## 10 10 ENSBTA… 3 not avai… protein_cod… -1.48 3.72 -7.92 1.51e- 8
## # … with 87 more rows, and 2 more variables: adj.P.Val <dbl>, B <dbl>
MBvsFBForpolled_DEG <- read_csv("midbrainvsforebrainForpolled-ARS_jo-DEG_p0.05_lfc1.csv")
MBvsFBForpolled_DEG
## # A tibble: 6 x 11
## X1 gene_id chr gene_name gene_biotype logFC AveExpr t P.Value
## <dbl> <chr> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1 ENSBTA… 5 COL2A1 protein_cod… 3.21 6.27 7.95 1.41e-8
## 2 2 ENSBTA… 5 ALX1 protein_cod… -2.18 5.30 -7.87 1.72e-8
## 3 3 ENSBTA… 2 SATB2 protein_cod… -1.32 7.77 -6.48 5.77e-7
## 4 4 ENSBTA… 4 SFRP4 protein_cod… -1.18 3.76 -5.72 4.28e-6
## 5 5 ENSBTA… 12 CNMD protein_cod… 2.27 4.37 5.33 1.22e-5
## 6 6 ENSBTA… 23 COL11A2 protein_cod… 2.36 4.18 5.24 1.57e-5
## # … with 2 more variables: adj.P.Val <dbl>, B <dbl>
HBvsFSForhorned_DEG <- read_csv("hornbudvsfrontal_skinForhorned-ARS_jo-DEG_p0.05_lfc1.csv")
HBvsFSForhorned_DEG
## # A tibble: 97 x 11
## X1 gene_id chr gene_name gene_biotype logFC AveExpr t P.Value
## <dbl> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1 ENSBTA… 9 SMOC2 protein_cod… 1.59 5.57 9.49 3.90e-10
## 2 2 ENSBTA… 5 KRT1 protein_cod… 4.94 2.36 8.98 1.26e- 9
## 3 3 ENSBTA… 23 PLA2G7 protein_cod… 1.88 3.57 8.56 3.34e- 9
## 4 4 ENSBTA… 5 KRT6B protein_cod… 4.47 2.25 8.70 2.38e- 9
## 5 5 ENSBTA… 27 NRG1 protein_cod… -1.40 4.31 -7.91 1.56e- 8
## 6 6 ENSBTA… 5 not avai… protein_cod… 4.49 1.98 8.24 6.97e- 9
## 7 7 ENSBTA… 19 KRT14 protein_cod… 5.18 1.53 8.24 7.10e- 9
## 8 8 ENSBTA… 5 LMO3 protein_cod… -1.49 4.10 -7.35 6.25e- 8
## 9 9 ENSBTA… 10 TGFB3 protein_cod… 1.01 4.04 6.72 3.10e- 7
## 10 10 ENSBTA… 24 CDH7 protein_cod… 1.36 6.19 6.69 3.39e- 7
## # … with 87 more rows, and 2 more variables: adj.P.Val <dbl>, B <dbl>
HBvsFSForpolled_DEG <- read_csv("hornbudvsfrontal_skinForpolled-ARS_jo-DEG_p0.05_lfc1.csv")
HBvsFSForpolled_DEG
## # A tibble: 2 x 11
## X1 gene_id chr gene_name gene_biotype logFC AveExpr t P.Value
## <dbl> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1 ENSBTA… X MID1IP1 protein_cod… 1.52 0.118 5.76 3.89e-6
## 2 2 ENSBTA… 8 not avai… protein_cod… 1.45 -0.552 5.95 2.35e-6
## # … with 2 more variables: adj.P.Val <dbl>, B <dbl>
MBvsFSForhorned_DEG <- read_csv("midbrainvsfrontal_skinForhorned-ARS_jo-DEG_p0.05_lfc1.csv")
MBvsFSForhorned_DEG
## # A tibble: 4,162 x 11
## X1 gene_id chr gene_name gene_biotype logFC AveExpr t P.Value
## <dbl> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1 ENSBTA… 2 SATB2 protein_cod… -2.97 7.77 -10.1 1.05e-10
## 2 2 ENSBTA… 20 C1QTNF3 protein_cod… 3.08 5.70 9.70 2.46e-10
## 3 3 ENSBTA… 7 GRIA1 protein_cod… 1.75 6.54 9.67 2.63e-10
## 4 4 ENSBTA… 5 ALX1 protein_cod… -3.72 5.30 -9.72 2.37e-10
## 5 5 ENSBTA… 7 ELL2 protein_cod… -2.07 6.33 -9.19 7.74e-10
## 6 6 ENSBTA… 3 not avai… protein_cod… -1.98 3.72 -9.15 8.40e-10
## 7 7 ENSBTA… 6 SGMS2 protein_cod… -2.04 6.26 -9.05 1.05e- 9
## 8 8 ENSBTA… 15 INSC protein_cod… -2.43 6.27 -8.99 1.22e- 9
## 9 9 ENSBTA… 8 GRIN3A protein_cod… 1.75 5.67 8.96 1.30e- 9
## 10 10 ENSBTA… 12 LMO7 protein_cod… -1.59 7.86 -8.76 2.06e- 9
## # … with 4,152 more rows, and 2 more variables: adj.P.Val <dbl>, B <dbl>
MBvsFSForpolled_DEG <- read_csv("midbrainvsfrontal_skinForpolled-ARS_jo-DEG_p0.05_lfc1.csv")
MBvsFSForpolled_DEG
## # A tibble: 336 x 11
## X1 gene_id chr gene_name gene_biotype logFC AveExpr t P.Value
## <dbl> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1 ENSBTA… 5 ALX1 protein_cod… -3.38 5.30 -15.7 3.56e-15
## 2 2 ENSBTA… 2 SATB2 protein_cod… -2.05 7.77 -14.4 2.98e-14
## 3 3 ENSBTA… 8 TNC protein_cod… -1.88 7.99 -13.7 8.99e-14
## 4 4 ENSBTA… 19 COL1A1 protein_cod… -2.20 11.2 -13.5 1.40e-13
## 5 5 ENSBTA… 7 SPARC protein_cod… -1.40 9.63 -13.1 2.73e-13
## 6 6 ENSBTA… 29 ANO5 protein_cod… -2.62 6.53 -12.8 5.00e-13
## 7 7 ENSBTA… 8 CENPP protein_cod… -1.58 7.45 -12.5 8.20e-13
## 8 8 ENSBTA… 7 LOX protein_cod… -1.33 6.06 -12.5 8.74e-13
## 9 9 ENSBTA… 6 SGMS2 protein_cod… -1.38 6.26 -12.1 1.79e-12
## 10 10 ENSBTA… 12 LMO7 protein_cod… -1.09 7.86 -12.0 2.01e-12
## # … with 326 more rows, and 2 more variables: adj.P.Val <dbl>, B <dbl>
MBvsHBForhorned_DEG <- read_csv("midbrainvshornbudForhorned-ARS_jo-DEG_p0.05_lfc1.csv")
MBvsHBForhorned_DEG
## # A tibble: 6,295 x 11
## X1 gene_id chr gene_name gene_biotype logFC AveExpr t P.Value
## <dbl> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1 ENSBTA… 12 LMO7 protein_cod… -1.87 7.86 -14.6 2.25e-14
## 2 2 ENSBTA… 20 MYO10 protein_cod… -1.88 7.79 -13.6 1.12e-13
## 3 3 ENSBTA… 4 WNT2 protein_cod… 1.59 3.81 13.1 2.67e-13
## 4 4 ENSBTA… 2 SATB2 protein_cod… -2.51 7.77 -12.9 4.11e-13
## 5 5 ENSBTA… 6 DKK2 protein_cod… -2.54 6.89 -12.7 5.89e-13
## 6 6 ENSBTA… 5 ALX1 protein_cod… -3.99 5.30 -12.7 5.49e-13
## 7 7 ENSBTA… 15 INSC protein_cod… -2.42 6.27 -12.6 6.75e-13
## 8 8 ENSBTA… 6 GABRA2 protein_cod… 1.78 5.39 12.4 1.03e-12
## 9 9 ENSBTA… 29 FAT3 protein_cod… -1.94 8.86 -12.4 1.10e-12
## 10 10 ENSBTA… 27 UNC5D protein_cod… 1.94 7.35 12.0 2.07e-12
## # … with 6,285 more rows, and 2 more variables: adj.P.Val <dbl>, B <dbl>
MBvsHBForpolled_DEG <- read_csv("midbrainvshornbudForpolled-ARS_jo-DEG_p0.05_lfc1.csv")
MBvsHBForpolled_DEG
## # A tibble: 280 x 11
## X1 gene_id chr gene_name gene_biotype logFC AveExpr t P.Value
## <dbl> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1 ENSBTA… 5 ALX1 protein_cod… -3.33 5.30 -12.7 6.24e-13
## 2 2 ENSBTA… 3 GPX7 protein_cod… -1.44 4.16 -11.2 1.08e-11
## 3 3 ENSBTA… 2 SATB2 protein_cod… -1.89 7.77 -9.76 2.15e-10
## 4 4 ENSBTA… 8 TNC protein_cod… -1.75 7.99 -9.42 4.59e-10
## 5 5 ENSBTA… 5 TMTC2 protein_cod… -1.56 8.88 -9.34 5.49e-10
## 6 6 ENSBTA… 19 COL1A1 protein_cod… -2.17 11.2 -9.55 3.44e-10
## 7 7 ENSBTA… 1 COL6A1 protein_cod… -1.43 5.19 -9.21 7.35e-10
## 8 8 ENSBTA… 17 TMEM119 protein_cod… -1.96 3.71 -9.27 6.43e-10
## 9 9 ENSBTA… 13 RRBP1 protein_cod… -1.40 6.17 -9.19 7.68e-10
## 10 10 ENSBTA… 12 LMO7 protein_cod… -1.12 7.86 -9.17 8.11e-10
## # … with 270 more rows, and 2 more variables: adj.P.Val <dbl>, B <dbl>
join all Gene results
DEGlist <- list(polledvshornedFS_DEG, polledvshornedHB_DEG, polledvshornedFB_DEG,polledvshornedMB_DEG, FBvsFSForhorned_DEG, FBvsFSForpolled_DEG, FBvsHBForhorned_DEG, FBvsHBForpolled_DEG, MBvsFBForhorned_DEG, MBvsFBForpolled_DEG, HBvsFSForhorned_DEG, HBvsFSForpolled_DEG, MBvsFSForhorned_DEG, MBvsFSForpolled_DEG, MBvsHBForhorned_DEG, MBvsHBForpolled_DEG)%>%
set_names(c("polledvshornedFS_DEG", "polledvshornedHB_DEG", "polledvshornedFB_DEG","polledvshornedMB_DEG", "FBvsFSForhorned_DEG","FBvsFSForpolled_DEG", "FBvsHBForhorned_DEG", "FBvsHBForpolled_DEG", "MBvsFBForhorned_DEG", "MBvsFBForpolled_DEG", "HBvsFSForhorned_DEG", "HBvsFSForpolled_DEG", "MBvsFSForhorned_DEG", "MBvsFSForpolled_DEG", "MBvsHBForhorned_DEG", "MBvsHBForpolled_DEG"))
DEGlist <- lapply(1:length(DEGlist), FUN = function(x, object = DEGlist){
object[[x]]%>%
mutate(Data_comp = rep(names(object[x]), dim(object[[x]])[1]))
})
Heatpmap top 100 Gene results
polledvshorned_ComGene <- Reduce(intersect, list(DEGlist[[1]]$gene_name,DEGlist[[2]]$gene_name,DEGlist[[3]]$gene_name, DEGlist[[4]]$gene_name))[-1]
polledvshorned_ComGenetop100 <- DEGlist%>%do.call("rbind",.)%>%
subset(gene_name %in% polledvshorned_ComGene[2:101])%>%
subset(Data_comp %in% c("polledvshornedFS_DEG", "polledvshornedHB_DEG", "polledvshornedFB_DEG", "polledvshornedMB_DEG"))
ggplot(polledvshorned_ComGenetop100, aes(x=Data_comp, y=gene_name, fill=-qnorm(P.Value))) +
geom_tile() +
scale_fill_viridis_c(name="Standard Z score", option = "B")+
theme(axis.text.x = element_text(angle = 20, vjust = 0.5, hjust=0.3), axis.text.y = element_text(size = 4))
masterDGEList <- read_rds("masterDGEList.rds")
counts <- masterDGEList$counts[polledvshorned_ComGenetop100$gene_id,]
rownames(counts) <- polledvshorned_ComGenetop100$gene_name
sampleinfo<- read.csv("sampleinfo.csv")
paste0(sampleinfo$rna_id,"_",sampleinfo$Phenotype)
## [1] "546FB_horned" "546FS_horned" "546HB_horned" "546MB_horned" "618FB_horned"
## [6] "618FS_horned" "618HB_horned" "618MB_horned" "667FB_polled" "667FS_polled"
## [11] "667HB_polled" "667MB_polled" "668FB_horned" "668FS_horned" "668HB_horned"
## [16] "668MB_horned" "698FB_polled" "698FS_polled" "698HB_polled" "698MB_polled"
## [21] "709FB_polled" "709FS_polled" "709HB_polled" "709MB_polled" "736FB_horned"
## [26] "736FS_horned" "736HB_horned" "736MB_horned"
colnames(counts) <- paste0(sampleinfo$rna_id,"_",sampleinfo$Phenotype)
counts_Forplot <- apply(counts, 2, function(object){object-mean(object)/sd(object)})%>%
reshape2::melt()
counts_Forplot$Phenotype <- stringr::str_extract(counts_Forplot$Var2, "_.{0,10}")
ggplot(counts_Forplot,aes(x=Var2, y=Var1, fill=value)) +
geom_tile() +
facet_grid(~`Phenotype`, scales="free_x") +
scale_fill_viridis_c(name="Standard Z score", option = "B")+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=0.3), axis.text.y = element_text(size = 4))
DEGlist%>%
do.call("rbind",.)%>%
subset(gene_id %in% c("jo_celtic", "jo_mongolian", "jo_mongolian", "jo_est_1", "jo_est_2", "jo_LOC104970777", "jo_LOC112447120", "jo_LOC104970778", "jo_LOC112447121", "jo_LOC112447133", "jo_LOC112447136"))
## # A tibble: 4 x 12
## X1 gene_id chr gene_name gene_biotype logFC AveExpr t P.Value
## <dbl> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 4846 jo_LOC… 1 not avai… EST 1.77 0.830 3.48 1.68e-3
## 2 4830 jo_LOC… 1 not avai… EST 1.82 0.830 5.03 2.75e-5
## 3 2953 jo_LOC… 1 not avai… EST 1.21 0.830 3.62 1.18e-3
## 4 4601 jo_LOC… 1 not avai… EST 1.38 0.830 4.17 2.76e-4
## # … with 3 more variables: adj.P.Val <dbl>, B <dbl>, Data_comp <chr>
ALL_GO2gene
ALL_GO2gene <- read_rds("ALL_GO2gene.rds")
PlOT preparation
find Ancestors and data
##-------------------------setAncestors-------------------------------
allAncestorBP <- GOBPANCESTOR %>%
as.list() %>%
magrittr::extract(!is.na(.))
allAncestorCC <- GOCCANCESTOR %>%
as.list() %>%
magrittr::extract(!is.na(.))
allAncestorMF <- GOMFANCESTOR %>%
as.list() %>%
magrittr::extract(!is.na(.))
allParents <- list(
BP = GOBPPARENTS %>%
as.list() %>%
magrittr::extract(!is.na(.)),
CC = GOCCPARENTS %>%
as.list() %>%
magrittr::extract(!is.na(.)),
MF = GOMFPARENTS %>%
as.list() %>%
magrittr::extract(!is.na(.))
)
allChildren <- list(
BP = GOBPCHILDREN %>%
as.list() %>%
magrittr::extract(!is.na(.)),
CC = GOCCCHILDREN %>%
as.list() %>%
magrittr::extract(!is.na(.)),
MF = GOMFCHILDREN %>%
as.list() %>%
magrittr::extract(!is.na(.))
)
##-------------------------GO2gene----------------------------------------
sub_GO2gene <- data.frame(
ensembl_geneID = ALL_GO2gene$ensembl_gene_id,
Gene_name = ALL_GO2gene$external_gene_name,
Gene_description = ALL_GO2gene$description,
GO_term_accession = ALL_GO2gene$go_id)%>%
drop_na()%>%
subset(GO_term_accession %in% GOresults$goid)%>%
left_join(.,GOresults[,-1],by=c("GO_term_accession" = "goid"))%>%
set_colnames(c("ensembl_geneID", "Gene_name", "Gene_description", "GO_term_accession", "GO_term_name", "GO_domain", "N", "DE", "P.DE", "adjP", "Data_comp"))
sub_GO2gene$GO_domain <- gsub("CC", "cellular_component", sub_GO2gene$GO_domain)
sub_GO2gene$GO_domain <- gsub("BP", "biological_process", sub_GO2gene$GO_domain)
sub_GO2gene$GO_domain <- gsub("MF", "molecular_function", sub_GO2gene$GO_domain)
ALL_GO2gene <- data.frame(
ensembl_geneID = ALL_GO2gene$ensembl_gene_id,
Gene_name = ALL_GO2gene$external_gene_name,
Gene_description = ALL_GO2gene$description,
GO_domain = get_names(ALL_GO2gene$go_id)%>%extract2("root_node"),
GO_term_accession = ALL_GO2gene$go_id,
GO_term_name = get_names(ALL_GO2gene$go_id)%>%extract2("go_name"))%>%
drop_na()
Heatpmap all GO results
sub_GO2gene%>%
subset(Data_comp %in% c("polledvshornedFS_GOresults", "polledvshornedHB_GOresults", "polledvshornedFB_GOresults", "polledvshornedMB_GOresults"))%>%
ggplot(aes(x=Data_comp, y=GO_term_name, fill=-qnorm(P.DE))) +
geom_tile() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
scale_fill_viridis_c(name="Standard Z score", option = "B")
'%!in%' <- function(x,y)!('%in%'(x,y))
sub_GO2gene%>%
subset(Data_comp %!in% c("polledvshornedFS_GOresults", "polledvshornedHB_GOresults", "polledvshornedFB_GOresults", "polledvshornedMB_GOresults"))%>%
ggplot(aes(x=Data_comp, y=GO_term_name, fill=-qnorm(P.DE))) +
geom_tile() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
scale_fill_viridis_c(name="Standard Z score", option = "B")
Set colour plate of the nodes
nCols <- 101
pSeq <- seq(0,10, length.out = nCols)
plotCols <- colorRampPalette(c("#FFFFFF", brewer.pal(7, "YlOrRd")))(nCols + 9)[seq(nCols)]
# Check the color pool for terms
lg <- data_frame(x = seq(nCols) - 1) %>%
ggplot(aes(x = x, y = 1, fill = as.factor(x))) +
geom_raster() +
scale_fill_manual(values = plotCols) +
scale_x_continuous(breaks = quantile(seq(nCols)-1, probs = c(0, 0.5, 1)),
labels = quantile(pSeq, probs= c(0, 0.5, 1)),
expand = c(0, 0))+
scale_y_continuous(expand = c(0, 0)) +
theme_bw() +
guides(fill = FALSE) +
labs(y = c(), x = c(), title = expression(paste(-log[10], "(p)"))) +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
plot.title = element_text(size = 10, hjust = 0.5))
lg
function to plot the correlations between GO terms
Make_clusterGOplot <- function(object, Ontology = "biological_process", FONT = 20){
# select the GO terms to plot
Selectgo <- sub_GO2gene%>%
subset(Data_comp %in% object)%>%
subset(GO_domain %in% Ontology)
Selectgo <- Selectgo[,c("GO_term_accession", "GO_term_name", "GO_domain", "P.DE")]%>%
unique()%>%
set_rownames(c())%>%
column_to_rownames("GO_term_accession")
topGO2plot <- sub_GO2gene%>%
subset(Data_comp %in% object)%>%
subset(GO_domain %in% Ontology)%>%
extract2("GO_term_accession")%>%
unique()
# pick ANCESTOR groups
if (length(topGO2plot) > 1){
if (Ontology == "biological_process"){
allBPPlot <- oneGOGraph(topGO2plot[1], GOBPANCESTOR)
for (i in 2:length(topGO2plot)){
allBPPlot %<>% graph::join(oneGOGraph(topGO2plot[i], GOBPANCESTOR))
}
allBPPlot %<>% removeNode("all", .)
}
if (Ontology == "cellular_component"){
allBPPlot <- oneGOGraph(topGO2plot[1], GOCCANCESTOR)
for (i in 2:length(topGO2plot)){
allBPPlot %<>% graph::join(oneGOGraph(topGO2plot[i], GOCCANCESTOR))
}
allBPPlot %<>% removeNode("all", .)
}
if (Ontology == "molecular_function"){
allBPPlot <- oneGOGraph(topGO2plot[1], GOMFANCESTOR)
for (i in 2:length(topGO2plot)){
allBPPlot %<>% graph::join(oneGOGraph(topGO2plot[i], GOMFANCESTOR))
}
allBPPlot %<>% removeNode("all", .)
}
# find the nodes and edges to plot
nodeNames<- allBPPlot %>%
nodeData() %>%
names()
validEdges <- nodeNames %>%
lapply(function(x){
parents <- allParents$BP[[x]]
paste(rep(x, length(parents)), parents, sep="|")
}) %>%
unlist()
rmEdges <- allBPPlot %>%
edgeData() %>%
names %>%
setdiff(validEdges) %>%
strsplit(split = "\\|") %>%
as.data.frame() %>%
t %>%
set_rownames(c())
allBPPlot %<>% removeEdge(rmEdges[,1], rmEdges[,2], .)
# only color the terms in database
setCols <- function(child, ontology = c("BP","MF", "CC"), SigGO = topGO2plot, GOResults = Selectgo ){
if (child %in% topGO2plot) {
termCols <- data.frame(
Term = child,
Ontology = ontology,
p = -log10(GOResults[child,]$P.DE))
p <- termCols$p
termCols <- mutate(termCols,
colGrp = findInterval(p, pSeq))%>%
extract2("colGrp")%>%
set_names(child)
}
else{
termCols <- data.frame(
Term = child,
Ontology = ontology,
p = 0)
p <- termCols$p
termCols <- mutate(termCols,
colGrp = findInterval(p, pSeq))%>%
extract2("colGrp")%>%
set_names(child)
}
}
if (Ontology == "biological_process"){
OT <- "BP"}
if (Ontology == "cellular_component"){
OT <- "CC"}
if (Ontology == "molecular_function"){
OT <- "MF"}
# Color the terms by p-values
termCols <- lapply(nodeNames, setCols, ontology = OT)%>%
unlist()%>%
as.data.frame()
# Set node attributes
nodeAttrs <- makeNodeAttrs(allBPPlot, shape = "ellipse", fillcolor=rgb(1, 1, 1))
nodeNames <- names(nodeAttrs$fillcolor)
nodeAttrs$fillcolor <- termCols[nodeNames,] %>% set_names(nodeNames)
nodeAttrs$fillcolor[is.na(nodeAttrs$fillcolor)] <- 1
nodeAttrs$fillcolor <- plotCols[nodeAttrs$fillcolor] %>% set_names(nodeNames)
nodeAttrs$label <- paste(nodeNames,
str_wrap(Term(nodeNames), 18),
sep = "\n") %>%
set_names(nodeNames)
nodeAttrs$shape[topGO2plot] <- "rectangle"
attrs <- list(node = list(fontsize = FONT,
fixedSize = FALSE),
edge = list(color = "grey70"))
scaleEllipse <- 1.6
goPlot <- allBPPlot %>%
reverseEdgeDirections() %>%
layoutGraph(nodeAttrs = nodeAttrs, attrs = attrs, layoutType = "dot")
graphRI <- graphRenderInfo(goPlot)
graphRI$bbox <- matrix(c(-20, -40, 20, 20), byrow = TRUE, ncol=2) + graphRI$bbox
graphRenderInfo(goPlot) <- graphRI
nodeRI <- nodeRenderInfo(goPlot)
nodeRI$lWidth <- nodeRI$lWidth*scaleEllipse
nodeRI$rWidth <- nodeRI$rWidth*scaleEllipse
nodeRI$height <- nodeRI$height*scaleEllipse
nodeRenderInfo(goPlot) <- nodeRI
PLOT_NAME <-paste0(paste0(object, "_", Ontology),".","pdf")
if (numEdges(goPlot) > 0 && numNodes(goPlot) > 0){
# Open a pdf file
pdf(PLOT_NAME)
# plot in the pdf file
renderGraph(goPlot)
# Close the pdf file
dev.off()
}
else{
WARN <- paste0("No correlated edge between terms", "for ", object)
print(WARN)
}
}
else {
WARN <- paste0(paste0("Not enough GO term in this"," ",Ontology),"for ", object)
print(WARN)
}
}
comp_list <- sub_GO2gene$Data_comp%>%
table()%>%
names()%>%
as.list()
lapply(c(1:length(comp_list)),FUN = function(x){Make_clusterGOplot(object = comp_list[[x]], Ontology = "biological_process",FONT = 20)})
## [1] "Not enough GO term in this biological_processfor polledvshornedFB_GOresults"
## [[1]]
## quartz_off_screen
## 2
##
## [[2]]
## quartz_off_screen
## 2
##
## [[3]]
## quartz_off_screen
## 2
##
## [[4]]
## quartz_off_screen
## 2
##
## [[5]]
## quartz_off_screen
## 2
##
## [[6]]
## quartz_off_screen
## 2
##
## [[7]]
## quartz_off_screen
## 2
##
## [[8]]
## [1] "Not enough GO term in this biological_processfor polledvshornedFB_GOresults"
##
## [[9]]
## quartz_off_screen
## 2
##
## [[10]]
## quartz_off_screen
## 2
##
## [[11]]
## quartz_off_screen
## 2
lapply(c(1:length(comp_list)),FUN = function(x){Make_clusterGOplot(object = comp_list[[x]], Ontology = "cellular_component",FONT = 20)})
## [1] "No correlated edge between termsfor FBvsFSForhorned_GOresults"
## [1] "No correlated edge between termsfor FBvsHBForhorned_GOresults"
## [1] "Not enough GO term in this cellular_componentfor FBvsHBForpolled_GOresults"
## [1] "No correlated edge between termsfor MBvsFSForhorned_GOresults"
## [1] "Not enough GO term in this cellular_componentfor MBvsFSForpolled_GOresults"
## [1] "No correlated edge between termsfor MBvsHBForhorned_GOresults"
## [1] "No correlated edge between termsfor MBvsHBForpolled_GOresults"
## [1] "No correlated edge between termsfor polledvshornedFB_GOresults"
## [1] "No correlated edge between termsfor polledvshornedFS_GOresults"
## [1] "No correlated edge between termsfor polledvshornedHB_GOresults"
## [1] "No correlated edge between termsfor polledvshornedMB_GOresults"
## [[1]]
## [1] "No correlated edge between termsfor FBvsFSForhorned_GOresults"
##
## [[2]]
## [1] "No correlated edge between termsfor FBvsHBForhorned_GOresults"
##
## [[3]]
## [1] "Not enough GO term in this cellular_componentfor FBvsHBForpolled_GOresults"
##
## [[4]]
## [1] "No correlated edge between termsfor MBvsFSForhorned_GOresults"
##
## [[5]]
## [1] "Not enough GO term in this cellular_componentfor MBvsFSForpolled_GOresults"
##
## [[6]]
## [1] "No correlated edge between termsfor MBvsHBForhorned_GOresults"
##
## [[7]]
## [1] "No correlated edge between termsfor MBvsHBForpolled_GOresults"
##
## [[8]]
## [1] "No correlated edge between termsfor polledvshornedFB_GOresults"
##
## [[9]]
## [1] "No correlated edge between termsfor polledvshornedFS_GOresults"
##
## [[10]]
## [1] "No correlated edge between termsfor polledvshornedHB_GOresults"
##
## [[11]]
## [1] "No correlated edge between termsfor polledvshornedMB_GOresults"
lapply(c(1:length(comp_list)),FUN = function(x){Make_clusterGOplot(object = comp_list[[x]], Ontology = "molecular_function",FONT = 20)})
## [1] "Not enough GO term in this molecular_functionfor FBvsFSForhorned_GOresults"
## [1] "Not enough GO term in this molecular_functionfor FBvsHBForhorned_GOresults"
## [1] "No correlated edge between termsfor FBvsHBForpolled_GOresults"
## [1] "Not enough GO term in this molecular_functionfor MBvsFSForhorned_GOresults"
## [1] "Not enough GO term in this molecular_functionfor MBvsFSForpolled_GOresults"
## [1] "Not enough GO term in this molecular_functionfor MBvsHBForhorned_GOresults"
## [1] "No correlated edge between termsfor MBvsHBForpolled_GOresults"
## [1] "Not enough GO term in this molecular_functionfor polledvshornedFB_GOresults"
## [1] "No correlated edge between termsfor polledvshornedFS_GOresults"
## [1] "No correlated edge between termsfor polledvshornedHB_GOresults"
## [1] "No correlated edge between termsfor polledvshornedMB_GOresults"
## [[1]]
## [1] "Not enough GO term in this molecular_functionfor FBvsFSForhorned_GOresults"
##
## [[2]]
## [1] "Not enough GO term in this molecular_functionfor FBvsHBForhorned_GOresults"
##
## [[3]]
## [1] "No correlated edge between termsfor FBvsHBForpolled_GOresults"
##
## [[4]]
## [1] "Not enough GO term in this molecular_functionfor MBvsFSForhorned_GOresults"
##
## [[5]]
## [1] "Not enough GO term in this molecular_functionfor MBvsFSForpolled_GOresults"
##
## [[6]]
## [1] "Not enough GO term in this molecular_functionfor MBvsHBForhorned_GOresults"
##
## [[7]]
## [1] "No correlated edge between termsfor MBvsHBForpolled_GOresults"
##
## [[8]]
## [1] "Not enough GO term in this molecular_functionfor polledvshornedFB_GOresults"
##
## [[9]]
## [1] "No correlated edge between termsfor polledvshornedFS_GOresults"
##
## [[10]]
## [1] "No correlated edge between termsfor polledvshornedHB_GOresults"
##
## [[11]]
## [1] "No correlated edge between termsfor polledvshornedMB_GOresults"