Code/meanings

1 = sox10 positive nuclei

2 = background

3 = sox10 nerve

4 = black background (should be zero)

5 = neg nuclei

count = number of individual areas

area = total number of pixels

DATA WRANGLING

Import and arrange data

s532hb15 <- read.csv("C://IHC/SOX10/s532hb15_Regions_rearranged.csv")
#add meta data and reorder
s532hb15$sample <- "532"
s532hb15$tissue <- "HB"
s532hb15$section <- "15"
s532hb15$genotype <- "pp"
s532hb15$ab<- "sox10"
s532hb15 <- s532hb15[, c(11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)]


s532hb39 <- read.csv("C://IHC/SOX10/s532hb39_Regions_rearranged.csv")
#add meta data and reorder
s532hb39$sample <- "532"
s532hb39$tissue <- "HB"
s532hb39$section <- "39"
s532hb39$genotype <- "pp"
s532hb39$ab<- "sox10"
s532hb39 <- s532hb39[, c(11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)]

s736hb33 <- read.csv("C://IHC/SOX10/s736hb33_Regions_rearranged.csv")
#add meta data and reorder
s736hb33$sample <- "736"
s736hb33$tissue <- "HB"
s736hb33$section <- "33"
s736hb33$genotype <- "pp"
s736hb33$ab<- "sox10"
s736hb33 <- s736hb33[, c(11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)]

s736hb23 <- read.csv("C://IHC/SOX10/s736hb23_Regions_rearranged.csv")
#add meta data and reorder
s736hb23$sample <- "736"
s736hb23$tissue <- "HB"
s736hb23$section <- "23"
s736hb23$genotype <- "pp"
s736hb23$ab<- "sox10"
s736hb23 <- s736hb23[, c(11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)]

s709hb45 <- read.csv("C://IHC/SOX10/s709hb45_Regions_rearranged.csv")
s709hb45$sample <- "709"
s709hb45$tissue <- "HB"
s709hb45$section <- "45"
s709hb45$genotype <- "PP"
s709hb45$ab<- "sox10"
s709hb45 <- s709hb45[, c(11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)]

s709hb35 <- read.csv("C://IHC/SOX10/s709hb35_Regions_rearranged.csv")
s709hb35$sample <- "709"
s709hb35$tissue <- "HB"
s709hb35$section <- "35"
s709hb35$genotype <- "PP"
s709hb35$ab<- "sox10"
s709hb35 <- s709hb35[, c(11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)]

s709hb25 <- read.csv("C://IHC/SOX10/s709hb25_Regions_rearranged.csv")
s709hb25$sample <- "709"
s709hb25$tissue <- "HB"
s709hb25$section <- "25"
s709hb25$genotype <- "PP"
s709hb25$ab<- "sox10"
s709hb25 <- s709hb25[, c(11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)]

s694fs39 <- read.csv("C://IHC/SOX10/s694fs39_Regions_rearranged.csv")
s694fs39$sample <- "694"
s694fs39$tissue <- "FS"
s694fs39$section <- "39"
s694fs39$genotype <- "PP"
s694fs39$ab<- "sox10"
s694fs39 <- s694fs39[, c(11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)]

s694fs19 <- read.csv("C://IHC/SOX10/s694fs19_Regions_rearranged.csv")
s694fs19$sample <- "694"
s694fs19$tissue <- "FS"
s694fs19$section <- "19"
s694fs19$genotype <- "PP"
s694fs19$ab<- "sox10"
s694fs19 <- s694fs19[, c(11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)]

s694fs27 <- read.csv("C://IHC/SOX10/s694fs27_Regions_rearranged.csv")
s694fs27$sample <- "694"
s694fs27$tissue <- "FS"
s694fs27$section <- "27"
s694fs27$genotype <- "PP"
s694fs27$ab<- "sox10"
s694fs27 <- s694fs27[, c(11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)]

s668hb55 <- read.csv("C://IHC/SOX10/s668hb55_Regions_rearranged.csv")
s668hb55$sample <- "668"
s668hb55$tissue <- "HB"
s668hb55$section <- "55"
s668hb55$genotype <- "pp"
s668hb55$ab<- "sox10"
s668hb55 <- s668hb55[, c(11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)]

s668hb45 <- read.csv("C://IHC/SOX10/s668hb45_Regions_rearranged.csv")
s668hb45$sample <- "668"
s668hb45$tissue <- "HB"
s668hb45$section <- "45"
s668hb45$genotype <- "pp"
s668hb45$ab<- "sox10"
s668hb45 <- s668hb45[, c(11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)]

s668hb3 <- read.csv("C://IHC/SOX10/s668hb3_Regions_rearranged.csv")
s668hb3$sample <- "668"
s668hb3$tissue <- "HB"
s668hb3$section <- "3"
s668hb3$genotype <- "pp"
s668hb3$ab<- "sox10"
s668hb3 <- s668hb3[, c(11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)]

s667hb63 <- read.csv("C://IHC/SOX10/s667hb63_Regions_rearranged.csv")
s667hb63$sample <- "667"
s667hb63$tissue <- "HB"
s667hb63$section <- "63"
s667hb63$genotype <- "PP"
s667hb63$ab<- "sox10"
s667hb63 <- s667hb63[, c(11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)]

s667hb29 <- read.csv("C://IHC/SOX10/s667hb29_Regions_rearranged.csv")
s667hb29$sample <- "667"
s667hb29$tissue <- "HB"
s667hb29$section <- "29"
s667hb29$genotype <- "PP"
s667hb29$ab<- "sox10"
s667hb29 <- s667hb29[, c(11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)]

s667hb3 <- read.csv("C://IHC/SOX10/s667hb3_Regions_rearranged.csv")
s667hb3$sample <- "667"
s667hb3$tissue <- "HB"
s667hb3$section <- "3"
s667hb3$genotype <- "PP"
s667hb3$ab<- "sox10"
s667hb3 <- s667hb3[, c(11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)]

s581hb63 <- read.csv("C://IHC/SOX10/s581hb63_Regions_rearranged.csv")
s581hb63$sample <- "581"
s581hb63$tissue <- "HB"
s581hb63$section <- "63"
s581hb63$genotype <- "pp"
s581hb63$ab<- "sox10"
s581hb63 <- s581hb63[, c(11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)]

s581hb53 <- read.csv("C://IHC/SOX10/s581hb53_Regions_rearranged.csv")
s581hb53$sample <- "581"
s581hb53$tissue <- "HB"
s581hb53$section <- "53"
s581hb53$genotype <- "pp"
s581hb53$ab<- "sox10"
s581hb53 <- s581hb53[, c(11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)]


s581hb43 <- read.csv("C://IHC/SOX10/s581hb43_Regions_rearranged.csv")
s581hb43$sample <- "581"
s581hb43$tissue <- "HB"
s581hb43$section <- "43"
s581hb43$genotype <- "pp"
s581hb43$ab<- "sox10"
s581hb43 <- s581hb43[, c(11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)]


s546hb63 <- read.csv("C://IHC/SOX10/s546hb63_Regions_rearranged.csv")
s546hb63$sample <- "546"
s546hb63$tissue <- "HB"
s546hb63$section <- "63"
s546hb63$genotype <- "pp"
s546hb63$ab<- "sox10"
s546hb63 <- s546hb63[, c(11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)]


s546hb57 <- read.csv("C://IHC/SOX10/s546hb57_Regions_rearranged.csv")
s546hb57$sample <- "546"
s546hb57$tissue <- "HB"
s546hb57$section <- "57"
s546hb57$genotype <- "pp"
s546hb57$ab<- "sox10"
s546hb57 <- s546hb57[, c(11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)]

s546hb23 <- read.csv("C://IHC/SOX10/s546hb23_Regions_rearranged.csv")
s546hb23$sample <- "546"
s546hb23$tissue <- "HB"
s546hb23$section <- "23"
s546hb23$genotype <- "pp"
s546hb23$ab<- "sox10"
s546hb23 <- s546hb23[, c(11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)]


s532hb51 <- read.csv("C://IHC/SOX10/s532hb51_Regions_rearranged.csv")
s532hb51$sample <- "532"
s532hb51$tissue <- "HB"
s532hb51$section <- "51"
s532hb51$genotype <- "pp"
s532hb51$ab<- "sox10"
s532hb51 <- s532hb51[, c(11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)]

raw_data <- rbind(s532hb15, s532hb39, s736hb33, s736hb23, s709hb45, s709hb35, s709hb25, 
                  s694fs39, s694fs27, s694fs19, s668hb55, s668hb45, s668hb3, s667hb63, 
                  s667hb29, s667hb3, s581hb63, s581hb53, s581hb43, s546hb63, s546hb57, 
                  s546hb23, s532hb51) 

write.csv(raw_data, "C://IHC/SOX10/sox10_raw_data.csv")

Checking for Normality of raw data

Conclusion: Not all data is normal

sox10_raw_data <- read.csv("C:/IHC/sox10/sox10_raw_data.csv")
hist(sox10_raw_data$count_1, breaks=7)

qqnorm(sox10_raw_data$count_1)

shapiro.test(sox10_raw_data$count_1)

## 
##  Shapiro-Wilk normality test
## 
## data:  sox10_raw_data$count_1
## W = 0.82027, p-value = 0.0008235

hist(sox10_raw_data$count_2, breaks=7)

qqnorm(sox10_raw_data$count_2)

shapiro.test(sox10_raw_data$count_2)

## 
##  Shapiro-Wilk normality test
## 
## data:  sox10_raw_data$count_2
## W = 0.86017, p-value = 0.004182

hist(sox10_raw_data$count_3, breaks=7)

qqnorm(sox10_raw_data$count_3)

shapiro.test(sox10_raw_data$count_3)

## 
##  Shapiro-Wilk normality test
## 
## data:  sox10_raw_data$count_3
## W = 0.93858, p-value = 0.1675

hist(sox10_raw_data$count_5, breaks=7)

qqnorm(sox10_raw_data$count_5)

shapiro.test(sox10_raw_data$count_5)

## 
##  Shapiro-Wilk normality test
## 
## data:  sox10_raw_data$count_5
## W = 0.95727, p-value = 0.4105

hist(sox10_raw_data$area_1, breaks=7)

qqnorm(sox10_raw_data$area_1)

shapiro.test(sox10_raw_data$area_1)

## 
##  Shapiro-Wilk normality test
## 
## data:  sox10_raw_data$area_1
## W = 0.77614, p-value = 0.0001629

hist(sox10_raw_data$area_2, breaks=7)

qqnorm(sox10_raw_data$area_2)

shapiro.test(sox10_raw_data$area_2)

## 
##  Shapiro-Wilk normality test
## 
## data:  sox10_raw_data$area_2
## W = 0.94576, p-value = 0.2385

hist(sox10_raw_data$area_3, breaks=7)

qqnorm(sox10_raw_data$area_3)

shapiro.test(sox10_raw_data$area_3)

## 
##  Shapiro-Wilk normality test
## 
## data:  sox10_raw_data$area_3
## W = 0.80449, p-value = 0.0004524

hist(sox10_raw_data$area_5, breaks=7)

qqnorm(sox10_raw_data$area_5)

shapiro.test(sox10_raw_data$area_5)

## 
##  Shapiro-Wilk normality test
## 
## data:  sox10_raw_data$area_5
## W = 0.93045, p-value = 0.1118

box plots

ggboxplot(sox10_raw_data, x = "sample", y = "count_1", fill = "genotype") +
  theme_classic()

ggboxplot(sox10_raw_data, x = "sample", y = "count_3", fill = "genotype") +
  theme_classic()

ggboxplot(sox10_raw_data, x = "sample", y = "count_5", fill = "genotype") +
  theme_classic()

ggboxplot(sox10_raw_data, x = "sample", y = "area_1", fill = "genotype") +
  theme_classic()

ggboxplot(sox10_raw_data, x = "sample", y = "area_3", fill = "genotype") +
  theme_classic()

ggboxplot(sox10_raw_data, x = "sample", y = "area_5", fill = "genotype") +
  theme_classic()

averaging

1 = sox10+ nuclei
2 = background
3 = sox10 nerve
4 = black background
5 = neg nuclei

#bind data from the same sample
s532HB <-rbind(s532hb15, 
             s532hb39, 
             s532hb51)
#calculate the average and standard deviation of each measure
mean_532HB <- data.frame (sample = "532HB",
                          genotype = "pp",
                          ab = "sox10",
                          count_1 = mean(s532HB$count_1),
                          count_2 = mean(s532HB$count_2),
                          count_3 = mean(s532HB$count_3),
                          count_4 = mean(s532HB$count_4),
                          count_5 = mean(s532HB$count_5),
                          area_1 = mean(s532HB$area_1),
                          area_2 = mean(s532HB$area_2),
                          area_3 = mean(s532HB$area_3),
                          area_4 = mean(s532HB$area_4),
                          area_5 = mean(s532HB$area_5),
                          sd_count_1 = sd(s532HB$count_1),
                          sd_count_2 = sd(s532HB$count_2),
                          sd_count_3 = sd(s532HB$count_3),
                          sd_count_4 = sd(s532HB$count_4),
                          sd_count_5 = sd(s532HB$count_5),
                          sd_area_1 = sd(s532HB$area_1),
                          sd_area_2 = sd(s532HB$area_2),
                          sd_area_3 = sd(s532HB$area_3),
                          sd_area_4 = sd(s532HB$area_4),
                          sd_area_5 = sd(s532HB$area_5))


s546HB <-rbind(s546hb23, 
             s546hb57, 
             s546hb63)

mean_546HB <- data.frame (sample = "546HB",
                          genotype = "pp",
                          ab = "sox10",
                          count_1 = mean(s546HB$count_1),
                          count_2 = mean(s546HB$count_2),
                          count_3 = mean(s546HB$count_3),
                          count_4 = mean(s546HB$count_4),
                          count_5 = mean(s546HB$count_5),
                          area_1 = mean(s546HB$area_1),
                          area_2 = mean(s546HB$area_2),
                          area_3 = mean(s546HB$area_3),
                          area_4 = mean(s546HB$area_4),
                          area_5 = mean(s546HB$area_5),
                          sd_count_1 = sd(s546HB$count_1),
                          sd_count_2 = sd(s546HB$count_2),
                          sd_count_3 = sd(s546HB$count_3),
                          sd_count_4 = sd(s546HB$count_4),
                          sd_count_5 = sd(s546HB$count_5),
                          sd_area_1 = sd(s546HB$area_1),
                          sd_area_2 = sd(s546HB$area_2),
                          sd_area_3 = sd(s546HB$area_3),
                          sd_area_4 = sd(s546HB$area_4),
                          sd_area_5 = sd(s546HB$area_5))


s581HB <-rbind(s581hb43, 
             s581hb53, 
             s581hb63)

mean_581HB <- data.frame (sample = "581HB",
                          genotype = "pp",
                          ab = "sox10",
                          count_1 = mean(s581HB$count_1),
                          count_2 = mean(s581HB$count_2),
                          count_3 = mean(s581HB$count_3),
                          count_4 = mean(s581HB$count_4),
                          count_5 = mean(s581HB$count_5),
                          area_1 = mean(s581HB$area_1),
                          area_2 = mean(s581HB$area_2),
                          area_3 = mean(s581HB$area_3),
                          area_4 = mean(s581HB$area_4),
                          area_5 = mean(s581HB$area_5),
                          sd_count_1 = sd(s581HB$count_1),
                          sd_count_2 = sd(s581HB$count_2),
                          sd_count_3 = sd(s581HB$count_3),
                          sd_count_4 = sd(s581HB$count_4),
                          sd_count_5 = sd(s581HB$count_5),
                          sd_area_1 = sd(s581HB$area_1),
                          sd_area_2 = sd(s581HB$area_2),
                          sd_area_3 = sd(s581HB$area_3),
                          sd_area_4 = sd(s581HB$area_4),
                          sd_area_5 = sd(s581HB$area_5))
             
s667HB <-rbind(s667hb29,
             s667hb3,
             s667hb63)

mean_667HB <- data.frame (sample = "667HB",
                          genotype = "PP",
                          ab = "sox10",
                          count_1 = mean(s667HB$count_1),
                          count_2 = mean(s667HB$count_2),
                          count_3 = mean(s667HB$count_3),
                          count_4 = mean(s667HB$count_4),
                          count_5 = mean(s667HB$count_5),
                          area_1 = mean(s667HB$area_1),
                          area_2 = mean(s667HB$area_2),
                          area_3 = mean(s667HB$area_3),
                          area_4 = mean(s667HB$area_4),
                          area_5 = mean(s667HB$area_5),
                          sd_count_1 = sd(s667HB$count_1),
                          sd_count_2 = sd(s667HB$count_2),
                          sd_count_3 = sd(s667HB$count_3),
                          sd_count_4 = sd(s667HB$count_4),
                          sd_count_5 = sd(s667HB$count_5),
                          sd_area_1 = sd(s667HB$area_1),
                          sd_area_2 = sd(s667HB$area_2),
                          sd_area_3 = sd(s667HB$area_3),
                          sd_area_4 = sd(s667HB$area_4),
                          sd_area_5 = sd(s667HB$area_5))



s668HB <-rbind(s668hb3,
             s668hb45,
             s668hb55)

mean_668HB <- data.frame (sample = "668HB",
                          genotype = "pp",
                          ab = "sox10",
                          count_1 = mean(s668HB$count_1),
                          count_2 = mean(s668HB$count_2),
                          count_3 = mean(s668HB$count_3),
                          count_4 = mean(s668HB$count_4),
                          count_5 = mean(s668HB$count_5),
                          area_1 = mean(s668HB$area_1),
                          area_2 = mean(s668HB$area_2),
                          area_3 = mean(s668HB$area_3),
                          area_4 = mean(s668HB$area_4),
                          area_5 = mean(s668HB$area_5),
                          sd_count_1 = sd(s668HB$count_1),
                          sd_count_2 = sd(s668HB$count_2),
                          sd_count_3 = sd(s668HB$count_3),
                          sd_count_4 = sd(s668HB$count_4),
                          sd_count_5 = sd(s668HB$count_5),
                          sd_area_1 = sd(s668HB$area_1),
                          sd_area_2 = sd(s668HB$area_2),
                          sd_area_3 = sd(s668HB$area_3),
                          sd_area_4 = sd(s668HB$area_4),
                          sd_area_5 = sd(s668HB$area_5))

s694FS <-rbind(s694fs27,
             s694fs39, s694fs19)

mean_694FS <- data.frame (sample = "694FS",
                          genotype = "PP",
                          ab = "sox10",
                          count_1 = mean(s694FS$count_1),
                          count_2 = mean(s694FS$count_2),
                          count_3 = mean(s694FS$count_3),
                          count_4 = mean(s694FS$count_4),
                          count_5 = mean(s694FS$count_5),
                          area_1 = mean(s694FS$area_1),
                          area_2 = mean(s694FS$area_2),
                          area_3 = mean(s694FS$area_3),
                          area_4 = mean(s694FS$area_4),
                          area_5 = mean(s694FS$area_5),
                          sd_count_1 = sd(s694FS$count_1),
                          sd_count_2 = sd(s694FS$count_2),
                          sd_count_3 = sd(s694FS$count_3),
                          sd_count_4 = sd(s694FS$count_4),
                          sd_count_5 = sd(s694FS$count_5),
                          sd_area_1 = sd(s694FS$area_1),
                          sd_area_2 = sd(s694FS$area_2),
                          sd_area_3 = sd(s694FS$area_3),
                          sd_area_4 = sd(s694FS$area_4),
                          sd_area_5 = sd(s694FS$area_5))

s709HB <-rbind(s709hb25,
             s709hb35,
             s709hb45)

mean_709HB <- data.frame (sample = "709HB",
                          genotype = "PP",
                          ab = "sox10",
                          count_1 = mean(s709HB$count_1),
                          count_2 = mean(s709HB$count_2),
                          count_3 = mean(s709HB$count_3),
                          count_4 = mean(s709HB$count_4),
                          count_5 = mean(s709HB$count_5),
                          area_1 = mean(s709HB$area_1),
                          area_2 = mean(s709HB$area_2),
                          area_3 = mean(s709HB$area_3),
                          area_4 = mean(s709HB$area_4),
                          area_5 = mean(s709HB$area_5),
                          sd_count_1 = sd(s709HB$count_1),
                          sd_count_2 = sd(s709HB$count_2),
                          sd_count_3 = sd(s709HB$count_3),
                          sd_count_4 = sd(s709HB$count_4),
                          sd_count_5 = sd(s709HB$count_5),
                          sd_area_1 = sd(s709HB$area_1),
                          sd_area_2 = sd(s709HB$area_2),
                          sd_area_3 = sd(s709HB$area_3),
                          sd_area_4 = sd(s709HB$area_4),
                          sd_area_5 = sd(s709HB$area_5))

s736HB <-rbind(s736hb23,
             s736hb33)

mean_736HB <- data.frame (sample = "736HB",
                          genotype = "pp",
                          ab = "sox10",
                          count_1 = mean(s736HB$count_1),
                          count_2 = mean(s736HB$count_2),
                          count_3 = mean(s736HB$count_3),
                          count_4 = mean(s736HB$count_4),
                          count_5 = mean(s736HB$count_5),
                          area_1 = mean(s736HB$area_1),
                          area_2 = mean(s736HB$area_2),
                          area_3 = mean(s736HB$area_3),
                          area_4 = mean(s736HB$area_4),
                          area_5 = mean(s736HB$area_5),
                          sd_count_1 = sd(s736HB$count_1),
                          sd_count_2 = sd(s736HB$count_2),
                          sd_count_3 = sd(s736HB$count_3),
                          sd_count_4 = sd(s736HB$count_4),
                          sd_count_5 = sd(s736HB$count_5),
                          sd_area_1 = sd(s736HB$area_1),
                          sd_area_2 = sd(s736HB$area_2),
                          sd_area_3 = sd(s736HB$area_3),
                          sd_area_4 = sd(s736HB$area_4),
                          sd_area_5 = sd(s736HB$area_5))

#data1 combines individual samples in a table
sox10_meanforsample <-rbind(mean_532HB, mean_546HB, mean_581HB, mean_736HB, mean_668HB, mean_694FS, mean_709HB, mean_667HB)

Checking for normality of data averaged per sample.

Conclusion: Data is normal as p-value is >0.05

head(sox10_meanforsample)

##   sample genotype    ab  count_1  count_2   count_3   count_4   count_5
## 1  532HB       pp sox10 26.33333 57.66667 41.333333 0.0000000 1319.6667
## 2  546HB       pp sox10 39.66667 66.33333 23.666667 0.0000000 1216.6667
## 3  581HB       pp sox10 16.00000 39.33333  8.333333 0.3333333  957.6667
## 4  736HB       pp sox10 36.00000 55.00000 26.000000 0.0000000 1251.5000
## 5  668HB       pp sox10 21.00000 59.66667 22.000000 0.0000000  942.6667
## 6  694FS       PP sox10 24.00000 39.66667 33.333333 0.3333333 1319.6667
##     area_1   area_2    area_3   area_4    area_5 sd_count_1 sd_count_2
## 1 13884.67  8173307 202956.67   0.0000 1027540.3  11.930353  14.571662
## 2 23213.33  7944256 197378.33   0.0000  756040.7  36.143234  65.431898
## 3 10495.33  6927733  47399.67 253.0000  701908.0   8.544004  19.756855
## 4 20250.00  7155650 145095.50   0.0000  842462.0  18.384776   9.899495
## 5 13291.33  6413436 119001.33   0.0000  966478.7   2.000000   7.023769
## 6 12873.67 11791826  75714.00 177.6667  986681.7  14.000000  24.214321
##   sd_count_3 sd_count_4 sd_count_5 sd_area_1 sd_area_2  sd_area_3 sd_area_4
## 1   7.023769  0.0000000   267.2870  8454.441  892971.8  27401.060    0.0000
## 2  16.502525  0.0000000   236.1807 24197.188 1130412.1 204291.550    0.0000
## 3   2.886751  0.5773503   353.6444  4927.145 1333486.3  18889.690  438.2089
## 4   4.242641  0.0000000   236.8808 10019.703 1370597.8   2284.662    0.0000
## 5   6.082763  0.0000000   223.7238  3125.982  525266.4  22058.077    0.0000
## 6  12.220202  0.5773503   323.5372  7482.734 1277686.4  48971.802  307.7277
##   sd_area_5
## 1  575006.5
## 2  184090.2
## 3  193246.8
## 4  242932.2
## 5  288018.1
## 6  346247.0

hist(sox10_meanforsample$count_1, breaks=5)

qqnorm(sox10_meanforsample$count_1)

shapiro.test(sox10_meanforsample$count_1)

## 
##  Shapiro-Wilk normality test
## 
## data:  sox10_meanforsample$count_1
## W = 0.92966, p-value = 0.513

hist(sox10_meanforsample$count_2, breaks=5)

qqnorm(sox10_meanforsample$count_2)

shapiro.test(sox10_meanforsample$count_2)

## 
##  Shapiro-Wilk normality test
## 
## data:  sox10_meanforsample$count_2
## W = 0.92192, p-value = 0.4457

hist(sox10_meanforsample$count_3, breaks=5)

qqnorm(sox10_meanforsample$count_3)

shapiro.test(sox10_meanforsample$count_3)

## 
##  Shapiro-Wilk normality test
## 
## data:  sox10_meanforsample$count_3
## W = 0.97461, p-value = 0.9315

hist(sox10_meanforsample$count_5, breaks=5)

qqnorm(sox10_meanforsample$count_5)

shapiro.test(sox10_meanforsample$count_5)

## 
##  Shapiro-Wilk normality test
## 
## data:  sox10_meanforsample$count_5
## W = 0.89619, p-value = 0.2669

hist(sox10_meanforsample$area_1, breaks=5)

qqnorm(sox10_meanforsample$area_1)

shapiro.test(sox10_meanforsample$area_1)

## 
##  Shapiro-Wilk normality test
## 
## data:  sox10_meanforsample$area_1
## W = 0.90703, p-value = 0.3337

hist(sox10_meanforsample$area_2, breaks=5)

qqnorm(sox10_meanforsample$area_2)

shapiro.test(sox10_meanforsample$area_2)

## 
##  Shapiro-Wilk normality test
## 
## data:  sox10_meanforsample$area_2
## W = 0.90174, p-value = 0.2995

hist(sox10_meanforsample$area_3, breaks=5)

qqnorm(sox10_meanforsample$area_3)

shapiro.test(sox10_meanforsample$area_3)

## 
##  Shapiro-Wilk normality test
## 
## data:  sox10_meanforsample$area_3
## W = 0.87871, p-value = 0.183

hist(sox10_meanforsample$area_5, breaks=5)

qqnorm(sox10_meanforsample$area_5)

shapiro.test(sox10_meanforsample$area_5)

## 
##  Shapiro-Wilk normality test
## 
## data:  sox10_meanforsample$area_5
## W = 0.92752, p-value = 0.4938

Distribution of data

ggboxplot(sox10_meanforsample, x = "genotype", y = "pTA_1", fill = "genotype") +
  theme_classic() + 
  labs(y = "SOX10 nuclei area/total area (proportion)")

ggboxplot(sox10_meanforsample, x = "genotype", y = "pTA_3", fill = "genotype") +
  theme_classic()  + 
  labs(y = "SOX10 nerve area/total area (proportion)")

ggboxplot(sox10_meanforsample, x = "genotype", y = "posTA", fill = "genotype") +
  theme_classic()  + 
  labs(y = "Total SOX10 area/total area (proportion)")

ggboxplot(sox10_meanforsample, x = "genotype", y = "pTA_5", fill = "genotype") +
  theme_classic()   + 
  labs(y = "unstained nuclei area/total area (proportion)")

hsum <- subset(sox10_meanforsample, genotype=="horned")
psum <- subset(sox10_meanforsample, genotype=="polled")


write.csv(hsum, "hsum.csv", row.names = FALSE)
write.csv(psum, "psum.csv", row.names = FALSE)

#Column graphs
hsum$posTA_percent <- hsum$posTA*100
hsum$posTA_percent <- hsum$pTA_1*100
hsum$posTA_percent <- hsum$pTA_3*100
psum$posTA_percent <- psum$posTA*100
psum$posTA_percent <- psum$pTA_1*100
psum$posTA_percent <- psum$pTA_3*100

#calculate values for column graph 1
hcol_one <- group_by(hsum, genotype) %>% 
  summarise(
    mean = mean(posTA, na.rm = TRUE),
    sd = sd(posTA, na.rm = TRUE)
  )
hcol_one$genotype <- "hornedHB"
hcol_one$n <- 5
hcol_one$se <- hcol_one$sd/sqrt(hcol_one$n)


pcol_one <- group_by(psum, genotype) %>%
  summarise(
    mean = mean(posTA, na.rm = TRUE),
    sd = sd(posTA, na.rm = TRUE)
  )
pcol_one$genotype <- "polledHB+FS"
pcol_one$n <- 3
pcol_one$se <- pcol_one$sd/sqrt(pcol_one$n)

col_one <- rbind(hcol_one, pcol_one)

#calculate values for column graph 2
hcol_two <- group_by(hsum, genotype) %>% 
  summarise(
    mean = mean(pTA_1, na.rm = TRUE),
    sd = sd(pTA_1, na.rm = TRUE)
  )
hcol_two$genotype <- "hornedHB"
hcol_two$n <- 5
hcol_two$se <- hcol_two$sd/sqrt(hcol_two$n)


pcol_two <- group_by(psum, genotype) %>%
  summarise(
    mean = mean(pTA_1, na.rm = TRUE),
    sd = sd(pTA_1, na.rm = TRUE)
  )
pcol_two$genotype <- "polledHB+FS"
pcol_two$n <- 3
pcol_two$se <- pcol_two$sd/sqrt(pcol_two$n)

sox10_col_two <- rbind(hcol_two, pcol_two)

#calculate values for column graph 2
hcol_three <- group_by(hsum, genotype) %>% 
  summarise(
    mean = mean(pTA_3, na.rm = TRUE),
    sd = sd(pTA_3, na.rm = TRUE)
  )
hcol_three$genotype <- "hornedHB"
hcol_three$n <- 5
hcol_three$se <- hcol_three$sd/sqrt(hcol_three$n)


pcol_three <- group_by(psum, genotype) %>%
  summarise(
    mean = mean(pTA_3, na.rm = TRUE),
    sd = sd(pTA_3, na.rm = TRUE)
  )
pcol_three$genotype <- "polledHB+FS"
pcol_three$n <- 3
pcol_three$se <- pcol_three$sd/sqrt(pcol_three$n)

sox10_col_three <- rbind(hcol_three, pcol_three)

#Total sox10 area
ggplot(col_one, aes(x=genotype, y=mean))+ 
  geom_col(width = 0.7, fill = "steelblue")+
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.2,
                                                    position=position_dodge(.9)) +
  labs(x = "Sample", y = "% area SOX10 positive")+
  ylim(0,0.03)+
  theme_light(base_size = 18)

#SOX10 nuclei
ggplot(sox10_col_two, aes(x=genotype, y=mean))+ 
  geom_col(width = 0.7, fill = "steelblue")+
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.2,
                                                    position=position_dodge(.9)) +
  labs(x = "Sample", y = "% area SOX10 positive nuclei")+
  ylim(0,0.003)+
  theme_light(base_size = 18)

#SOX10 nerves
ggplot(sox10_col_three, aes(x=genotype, y=mean))+ 
  geom_col(width = 0.7, fill = "steelblue")+
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.2,
                                                    position=position_dodge(.9)) +
  labs(x = "Sample", y = "% area SOX10 positive nerves")+
  ylim(0,0.03)+
  theme_light(base_size = 18)

horn <-rbind(mean_532HB, mean_546HB,  mean_581HB, mean_668HB,  mean_736HB)
head(horn)

##   sample genotype    ab  count_1  count_2   count_3   count_4   count_5
## 1  532HB       pp sox10 26.33333 57.66667 41.333333 0.0000000 1319.6667
## 2  546HB       pp sox10 39.66667 66.33333 23.666667 0.0000000 1216.6667
## 3  581HB       pp sox10 16.00000 39.33333  8.333333 0.3333333  957.6667
## 4  668HB       pp sox10 21.00000 59.66667 22.000000 0.0000000  942.6667
## 5  736HB       pp sox10 36.00000 55.00000 26.000000 0.0000000 1251.5000
##     area_1  area_2    area_3 area_4    area_5 sd_count_1 sd_count_2 sd_count_3
## 1 13884.67 8173307 202956.67      0 1027540.3  11.930353  14.571662   7.023769
## 2 23213.33 7944256 197378.33      0  756040.7  36.143234  65.431898  16.502525
## 3 10495.33 6927733  47399.67    253  701908.0   8.544004  19.756855   2.886751
## 4 13291.33 6413436 119001.33      0  966478.7   2.000000   7.023769   6.082763
## 5 20250.00 7155650 145095.50      0  842462.0  18.384776   9.899495   4.242641
##   sd_count_4 sd_count_5 sd_area_1 sd_area_2  sd_area_3 sd_area_4 sd_area_5
## 1  0.0000000   267.2870  8454.441  892971.8  27401.060    0.0000  575006.5
## 2  0.0000000   236.1807 24197.188 1130412.1 204291.550    0.0000  184090.2
## 3  0.5773503   353.6444  4927.145 1333486.3  18889.690  438.2089  193246.8
## 4  0.0000000   223.7238  3125.982  525266.4  22058.077    0.0000  288018.1
## 5  0.0000000   236.8808 10019.703 1370597.8   2284.662    0.0000  242932.2

#Comparison #1 - positive nuclei
horn$A1 <- horn$area_1 #positive pixels
horn$A2 <- horn$area_2 + horn$area_3  + horn$area_5 #calculate negative pixels

#Comparison #2 - positive nerve
horn$B1 <- horn$area_3 #positive pixels
horn$B2 <- horn$area_2 + horn$area_1  + horn$area_5 #calculate negative pixels

#Comparison #3 - positive nuclei + nerve
horn$C1 <- horn$area_1 + horn$area_3 #positive pixels
horn$C2 <- horn$area_2 + horn$area_5 #calculate negative pixels

horn_means <- as.data.frame(round(colMeans(horn[, c(24, 25, 26, 27, 28, 29)]))) # find mean
horn_means

##    round(colMeans(horn[, c(24, 25, 26, 27, 28, 29)]))
## A1                                              16227
## A2                                            8324129
## B1                                             142366
## B2                                            8197989
## C1                                             158593
## C2                                            8181762

poll <- rbind(mean_694FS, mean_709HB)

#Comparison #1 - positive nuclei
poll$A1 <- poll$area_1 #positive pixels
poll$A2 <- poll$area_2 + poll$area_3  + poll$area_5 #calculate negative pixels

#Comparison #2 - positive nerve
poll$B1 <- poll$area_3 #positive pixels
poll$B2 <- poll$area_2 + poll$area_1  + poll$area_5 #calculate negative pixels

#Comparison #3 - positive nuclei + nerve
poll$C1 <- poll$area_1 + poll$area_3 #positive pixels
poll$C2 <- poll$area_2 + poll$area_5 #calculate negative pixels


polled_means <- as.data.frame(round(colMeans(poll[, c(24, 25, 26, 27, 28, 29)])))
polled_means

##    round(colMeans(poll[, c(24, 25, 26, 27, 28, 29)]))
## A1                                              11875
## A2                                           10198106
## B1                                              56041
## B2                                           10153940
## C1                                              67916
## C2                                           10142065

#create new data frame with following format for each variable
#     |   +   |   -   |   Total
# pp  |       |       |   
# PP  |       |       | 
#Total|       |       |

COMPARISON 1

dat <- data.frame(
  "Positive" = c(horn_means[1,], polled_means[1,]), #change these values
  "Negative" = c(horn_means[2,], polled_means[2,]),
  row.names = c("Horned", "Polled"),
  stringsAsFactors = FALSE)

dat

##        Positive Negative
## Horned    16227  8324129
## Polled    11875 10198106

mosaicplot(dat,
           main = "Mosaic plot",
           color = TRUE)

test <- fisher.test(dat)
test

## 
##  Fisher's Exact Test for Count Data
## 
## data:  dat
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  1.634784 1.714350
## sample estimates:
## odds ratio 
##   1.674091

test$p.value

## [1] 0

COMPARISON 2

dat2 <- data.frame(
  "Positive" = c(horn_means[3,], polled_means[3,]),
  "Negative" = c(horn_means[4,], polled_means[4,]),
  row.names = c("Horned", "Polled"),
  stringsAsFactors = FALSE)

dat2

##        Positive Negative
## Horned   142366  8197989
## Polled    56041 10153940

write.csv(dat2, "C:/IHC/sox10_nerve")

mosaicplot(dat2,
           main = "Mosaic plot",
           color = TRUE)

test2 <- fisher.test(dat2)
test2

## 
##  Fisher's Exact Test for Count Data
## 
## data:  dat2
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  3.115836 3.177832
## sample estimates:
## odds ratio 
##     3.1465

test2$p.value

## [1] 0

COMPARISON 3

dat3 <- data.frame(
  "Positive" = c(horn_means[5,], polled_means[5,]), #change these values
  "Negative" = c(horn_means[6,], polled_means[6,]),
  row.names = c("Horned", "Polled"),
  stringsAsFactors = FALSE)

dat3

##        Positive Negative
## Horned   158593  8181762
## Polled    67916 10142065

mosaicplot(dat3,
           main = "Mosaic plot",
           color = TRUE)

test3 <- fisher.test(dat)
test3

## 
##  Fisher's Exact Test for Count Data
## 
## data:  dat
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  1.634784 1.714350
## sample estimates:
## odds ratio 
##   1.674091

test3$p.value

## [1] 0

For all three comparisons, the horned tissue significantly differs from the polled tissue.

IHC-sox10-stats

J Aldersey

28/01/2022