10 Designing Final Figures

10.1 Alex

# ------------------------------------------------------------------------------
# Alex's Survival Data Analysis with Workshop
# Alex Lewis
#----------------------------------------------------------------------------

#Load Libraries
require(tidyverse)
require(drc)
# Loading required package: drc
# Loading required package: MASS
# 
# Attaching package: 'MASS'
# The following object is masked from 'package:dplyr':
# 
#     select
# 
# 'drc' has been loaded.
# Please cite R and 'drc' if used for a publication,
# for references type 'citation()' and 'citation('drc')'.
# 
# Attaching package: 'drc'
# The following objects are masked from 'package:stats':
# 
#     gaussian, getInitial

# Load data
surv <- read_csv(here::here("student_data/raw_survival_alex_25JUL2022.csv"))
# Rows: 101 Columns: 6
# ── Column specification ────────────────────────────────────────────────────────
# Delimiter: ","
# chr (2): Genotype, Shock_Date
# dbl (4): Temperature, Eggs, Hatched_24, Hatched_48
# 
# ℹ Use `spec()` to retrieve the full column specification for this data.
# ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
lock_surv <- read_delim(here::here("student_data/raw_survival_lockwood_et_al_2018.txt"),
                        delim = "\t") %>%
  dplyr::rename(Genotype = genotype,
         Eggs = eggs,
         Hatched_48 = hatched,
         Temperature = temperature) 
# Rows: 933 Columns: 6
# ── Column specification ────────────────────────────────────────────────────────
# Delimiter: "\t"
# chr (2): genotype, region
# dbl (4): temperature, eggs, hatched, survival
# 
# ℹ Use `spec()` to retrieve the full column specification for this data.
# ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# Combine data
surv <- bind_rows(surv, lock_surv)

# LT50-----------------------------------------------------------------------
lt_surv_curv <- surv %>%
  filter(!str_detect(Genotype, "Canton")) %>%
  group_by(Genotype) %>%
  nest() %>%
  mutate(fit = map(data, ~ drm(Hatched_48/Eggs ~ Temperature, 
                               data = .x, 
                               weight = Eggs,
                               fct = LL.3(names = c("slope", 
                                                    "upper limit", 
                                                    "LT50")),
                               type = "binomial")),
         lt_s = map(fit, ~ ED(.x, 
                              c(10, 50, 90), 
                              interval = "delta",
                              display = FALSE))) %>%
  mutate(tidy_fit = map(fit, broom::tidy))  %>%
  mutate(lt_10 = lt_s[[1]][[1]],
         lt_50 = lt_s[[1]][[2]],
         lt_90 = lt_s[[1]][[3]])

# Calcu.ate mean and sd from locale groups
lt_surv_curv %>%
  filter(!str_detect(Genotype, "X")) %>%
  mutate(locale = case_when(str_detect(Genotype, "FLCK") ~ "Florida",
                            str_detect(Genotype, "VT") |
                              str_detect(Genotype, "BEA") |
                              str_detect(Genotype, "RFM") ~ "Temperate",
                            str_detect(Genotype, "JP") ~ "Japan",
                            str_detect(Genotype, "FR") ~ "France",
                            TRUE ~ "Tropical")) %>%
  group_by(locale) %>%
  summarise(lt50_mean = mean(lt_50),
            lt50_sd = sd(lt_50),
            count = n()) %>%
  mutate(lt50_se = lt50_sd/sqrt(count),
         lt50_95ci = lt50_se*1.96)
# # A tibble: 5 × 6
#   locale    lt50_mean lt50_sd count lt50_se lt50_95ci
#   <chr>         <dbl>   <dbl> <int>   <dbl>     <dbl>
# 1 Florida        34.9   0.268     6   0.109     0.214
# 2 France         35.0   0.511     2   0.361     0.708
# 3 Japan          34.4   0.387     2   0.274     0.537
# 4 Temperate      34.9   0.449    19   0.103     0.202
# 5 Tropical       35.9   0.414     5   0.185     0.363

lt_surv_curv
# # A tibble: 36 × 8
# # Groups:   Genotype [36]
#    Genotype data             fit    lt_s          tidy_fit lt_10 lt_50 lt_90
#    <chr>    <list>           <list> <list>        <list>   <dbl> <dbl> <dbl>
#  1 FLCK 02  <tibble [8 × 7]> <drc>  <dbl [3 × 4]> <tibble>  33.6  34.9  36.2
#  2 FLCK 03  <tibble [8 × 7]> <drc>  <dbl [3 × 4]> <tibble>  33.4  34.5  35.7
#  3 FLCK 05  <tibble [8 × 7]> <drc>  <dbl [3 × 4]> <tibble>  33.7  34.7  35.7
#  4 FLCK 06  <tibble [8 × 7]> <drc>  <dbl [3 × 4]> <tibble>  34.7  35.3  35.8
#  5 FLCK 08  <tibble [8 × 7]> <drc>  <dbl [3 × 4]> <tibble>  33.9  34.9  35.9
#  6 FLCK 09  <tibble [8 × 7]> <drc>  <dbl [3 × 4]> <tibble>  34.3  35.1  35.9
#  7 JPOK     <tibble [8 × 7]> <drc>  <dbl [3 × 4]> <tibble>  33.4  34.1  34.9
#  8 JPSC     <tibble [8 × 7]> <drc>  <dbl [3 × 4]> <tibble>  33.7  34.7  35.7
#  9 FRMO-01  <tibble [8 × 7]> <drc>  <dbl [3 × 4]> <tibble>  33.8  34.7  35.5
# 10 FRMO-02  <tibble [8 × 7]> <drc>  <dbl [3 × 4]> <tibble>  34.8  35.4  36.0
# # … with 26 more rows


# Plot survival curve for one genotype (FLCK)------------------------------------------
surv %>%
  filter(str_detect(Genotype, "FLCK")) %>%
  mutate(Prop_Hatched = Hatched_48/Eggs) %>%
  ggplot(aes(x = Temperature,
             y = Prop_Hatched,
             color = Genotype,
             fill = Genotype)) +
  geom_jitter(width = 0.1,
              shape = 21,
              alpha = 0.6,
              size = 2,
              color = "grey23") +
  geom_smooth(method = drm,
              method.args = list(fct = LL.3()),
              se = FALSE) +
  geom_hline(yintercept = 0.5,
             color = "grey50",
             linetype = 2) +
  labs(x = "Heat Shock Temperature (°C)",
       y = "Proportion Survived") +
  scale_y_continuous(limits = c(0, 1.01),
                     breaks = seq(from = 0, to = 1, by = 0.25)) +
  scale_x_continuous(limits = c(24, 37),
                     breaks = seq(from = 24, to = 38, by = 2)) +
  theme_classic(base_size = 16) +
  theme(panel.grid.major.y = element_line(color = "grey85",
                                          linetype = 3),
        axis.line = element_line(color = "grey70"),
        axis.ticks = element_line(color = "grey70"))
# `geom_smooth()` using formula 'y ~ x'


# ggsave("flck_final.png", 
#        height = 6, 
#        width = 9)



# Renormalize to 25°C hatching success ----------------------------------------
surv %>%
  filter(str_detect(Genotype, "FLCK")) %>%
  group_by(Genotype, Temperature) %>%
  summarise(Prop_Hatched = sum(Hatched_48)/sum(Eggs)) %>%
  mutate(Prop_Hatched_norm = Prop_Hatched/Prop_Hatched[Temperature == 25]) %>%
  ggplot(aes(x = Temperature,
             y = Prop_Hatched_norm,
             color = Genotype)) +
  geom_smooth(method = drm,
              method.args = list(fct = LL.3()),
              se = FALSE) +
  geom_hline(yintercept = 0.5,
             color = "grey50",
             linetype = 2) +
  labs(x = "Heat Shock Temperature (°C)",
       y = "Proportion Hatched at 48 hours\n(Normalized to 25°C Hatching Success)") +
  ylim(c(0, 1)) +
  theme_minimal()
# `summarise()` has grouped output by 'Genotype'. You can override using the
# `.groups` argument.
# `geom_smooth()` using formula 'y ~ x'
# Warning: Removed 6 rows containing non-finite values (stat_smooth).
# Warning in pt(x, df.residual(object)): NaNs produced

# Warning in pt(x, df.residual(object)): NaNs produced

# Warning in pt(x, df.residual(object)): NaNs produced

# Warning in pt(x, df.residual(object)): NaNs produced
# Warning in sqrt(resVar): NaNs produced
# Warning in sqrt(diag(varMat)): NaNs produced
# Warning in sqrt(resVar): NaNs produced
# Warning in sqrt(diag(varMat)): NaNs produced
# Warning: Removed 4 rows containing missing values (geom_smooth).




#Combined surv curve-----------------------------------------------------------
surv %>%
  mutate(region = ifelse(is.na (region),
                         case_when(str_detect(Genotype, "FLCK") ~ "Florida",
                                   str_detect(Genotype, "FR") ~ "France",
                                   str_detect(Genotype, "JP") ~ "Japan"),
                         str_to_sentence(region))) %>%
  mutate(region = factor(region, levels = c("Temperate", "Tropical", 
                                            "Japan", "France", "Florida"))) %>%
  filter(!str_detect(Genotype, "Canton") & 
           !str_detect(Genotype, "X")) %>%
  ggplot(aes(x = Temperature,
             y = Hatched_48/Eggs,
             color = region)) +
  geom_smooth (aes(group = Genotype),
               method = drm,
               method.args = list(fct = LL.3()),
               se = FALSE) +
  geom_smooth (aes(group = Genotype),
               method = drm,
               method.args = list(fct = LL.3()),
               se = FALSE) +
  geom_hline (yintercept = 0.50,
              color = "grey50",
              linetype = 2) +
  scale_color_manual(values = c("skyblue",
                                "pink",
                                "green",
                                "orange",
                                "firebrick"),
                     name = "Local") +
  labs(x = "Heat Shock Temperature (°C)",
       y = "Proportion Hatched") +
  theme_classic() +
  theme(panel.grid.major.y = element_line(color = "grey85",
                                          linetype = 3),
        axis.line = element_line(color = "grey70"),
        axis.ticks = element_line(color = "grey70"))
# `geom_smooth()` using formula 'y ~ x'
# `geom_smooth()` using formula 'y ~ x'




# Box plot of LT50s (Japan together)------------------------------------------------------------
lt_surv_curv %>%
  filter(!str_detect(Genotype, "X")) %>%
  mutate(region = case_when(str_detect(Genotype, "FLCK") ~ "New Genotypes",
                            str_detect(Genotype, "VT") |
                              str_detect(Genotype, "BEA") |
                              str_detect(Genotype, "RFM") ~ "North American Temperate",
                            str_detect(Genotype, "JP") ~ "New Genotypes",
                            str_detect(Genotype, "FR") ~ "New Genotypes",
                            TRUE ~ "Tropical")) %>%
  ggplot() +
  geom_boxplot(aes(x = region, 
                   y = lt_50,
                   fill = region,
                   color = region)) +
  scale_fill_manual("Locale", values = c( "New Genotypes" = "darkseagreen",
                               "North American Temperate" = "#8CC2EA",
                               "Tropical" = "#FF2E91")) +
  scale_color_manual("Locale", values = c("New Genotypes" = "grey23",
                                "North American Temperate" = "grey23",
                                "Tropical" = "grey23")) +
  labs(x = "Locale",
       y = "Embryo LT50 (°C)") +
  ylim(c(33, 37)) +
  theme_classic(base_size = 15) +
  theme(panel.grid.major.y = element_line(color = "grey85",
                                          linetype = 3),
        axis.line = element_line(color = "grey50"),
        axis.ticks = element_line(color = "grey50"))


# ggsave("boxplot_final_pres.png",
#        height = 6,
#        width = 9)

#Box Plot LT50s (Japan Split)--------------------------------------------------
lt_surv_curv %>%
  filter(!str_detect(Genotype, "X")) %>%
  mutate(region = case_when(str_detect(Genotype, "FLCK") ~ "Florida",
                            str_detect(Genotype, "VT") |
                              str_detect(Genotype, "BEA") |
                              str_detect(Genotype, "RFM") ~ "Temperate",
                            str_detect(Genotype, "JPOK") ~ "Okinawa, Japan",
                            str_detect(Genotype, "JPSC") ~ "Nagano, Japan", 
                            str_detect(Genotype, "FR") ~ "France",
                            TRUE ~ "Tropical")) %>%
  ggplot() +
  geom_boxplot(aes(x = region, 
                   y = lt_50,
                   fill = region,
                   color = region)) +
  scale_fill_manual("Region", values = c("Florida" = "sienna1",
                               "France" = "mediumpurple1",
                               "Okinawa, Japan" = "darkseagreen",
                               "Nagano, Japan" = "seagreen",
                               "Temperate" = "#8CC2EA",
                               "Tropical" = "#FF2D8F")) +
  scale_color_manual("Region", values = c("Florida" = "grey23",
                                "France" = "grey23",
                                "Okinawa, Japan" = "grey23",
                                "Nagano, Japan" = "grey23",
                                "Temperate" = "grey23",
                                "Tropical" = "grey23")) +
  labs(x = "Region",
       y = "Embryo LT50 (°C)") +
  ylim(c(33, 37)) +
  theme_classic()



# New data----------------------------------------------------------------------
surv %>%
  filter(Genotype == "FRMO-01" | 
           Genotype == "FRMO-02" | 
           Genotype == "JPSC" | 
           Genotype == "JPOK") %>%
  ggplot(aes(x = Temperature,
             y = Hatched_48/Eggs,
             color = Genotype,
             fill = Genotype)) +
  geom_smooth (method = drm,
               method.args = list(fct = LL.3()),
               se = FALSE) +
  geom_jitter(width = 0.1,
              shape = 21,
              alpha = 0.6,
              size = 2,
              color = "grey23") +
  geom_hline (yintercept = 0.50,
              color = "grey50",
              linetype = 2) +
  scale_color_manual(values = c("slateblue4",
                                "slateblue1",
                                "#92CC8F",
                                "#136F63"),
                     labels = c("France 1",
                                "France 2",
                                "Okinawa, Japan",
                                "Nagano, Japan")) +
  scale_fill_manual(values = c("slateblue4",
                               "slateblue1",
                               "#92CC8F",
                               "#136F63"),
                    labels = c("France 1",
                               "France 2",
                               "Okinawa, Japan",
                               "Nagano, Japan")) +
  scale_x_continuous(limits = c(25, 40),
                     breaks = seq(from = 26, to = 40, by = 2)) +
  scale_y_continuous(limits = c(0, 1.01),
                     breaks = seq(from = 0, to = 1, by = 0.25)) +
  labs(x = "Heat Shock Temperature (°C)",
       y = "Proportion Survived") +
  theme_classic(base_size = 16) +
  theme(panel.grid.major.y = element_line(color = "grey85",
                                          linetype = 3),
        axis.line = element_line(color = "grey70"),
        axis.ticks = element_line(color = "grey70"))
# `geom_smooth()` using formula 'y ~ x'
# Warning: Removed 4 rows containing missing values (geom_point).


# ggsave("fr_jp_final.png", 
#        height = 6, 
#        width = 9)

# Temperate vs new data---------------------------------------------------------
surv %>%
  mutate(region = ifelse(is.na (region),
                         case_when(str_detect(Genotype, "FLCK") ~ "Florida",
                                   str_detect(Genotype, "FR") ~ "France",
                                   str_detect(Genotype, "JPOK") ~ "Okinawa",
                                   str_detect(Genotype, "JPSC") ~ "Nagano"),
                         str_to_sentence(region))) %>%
  mutate(region = factor(region, levels = c("Temperate", "Tropical", 
                                            "Japan", "France", "Florida"))) %>%
  filter(!str_detect(Genotype, "Canton") & 
           !str_detect(Genotype, "X") &
           !str_detect(region, "Tropical")) %>%
  ggplot(aes(x = Temperature,
             y = Hatched_48/Eggs,
             color = region,
             linetype = region)) +
  geom_smooth (aes(group = Genotype),
               method = drm,
               method.args = list(fct = LL.3()),
               se = FALSE) +
  geom_hline (yintercept = 0.50,
              color = "grey50",
              linetype = 2) +
  scale_color_manual(values = c("#8CC2EA",
                                "slateblue4",
                                "slateblue4",
                                "slateblue4",
                                "slateblue4"),
                     name = "Local") +
  scale_linetype_manual(values = c(3,
                                   1,
                                   1,
                                   1)) +
  scale_x_continuous(limits = c(25, 40)) +
  labs(x = "Heat Shock Temperature (°C)",
       y = "Proportion Hatched") +
  theme_classic() +
  theme(panel.grid.major.y = element_line(color = "grey85",
                                          linetype = 3),
        axis.line = element_line(color = "grey70"),
        axis.ticks = element_line(color = "grey70")) +
  theme(axis.line.y = element_blank(),
        legend.position = "none") 
# `geom_smooth()` using formula 'y ~ x'


# Tropical vs new data----------------------------------------------------------
surv %>%
  mutate(region = ifelse(is.na (region),
                         case_when(str_detect(Genotype, "FLCK") ~ "Florida",
                                   str_detect(Genotype, "FR") ~ "France",
                                   str_detect(Genotype, "JP") ~ "Japan"),
                         str_to_sentence(region))) %>%
  mutate(region = factor(region, levels = c("Temperate", "Tropical", 
                                            "Japan", "France", "Florida"))) %>%
  filter(!str_detect(Genotype, "Canton") & 
           !str_detect(Genotype, "X") &
           !str_detect(region, "Temperate")) %>%
  ggplot(aes(x = Temperature,
             y = Hatched_48/Eggs,
             color = region,
             linetype = region)) +
  geom_smooth (aes(group = Genotype),
               method = drm,
               method.args = list(fct = LL.3()),
               se = FALSE) +
  scale_color_manual(values = c("#FF2E91",
                                "slateblue4",
                                "slateblue4",
                                "slateblue4"),
                     name = "Locale") +
  scale_linetype_manual(values = c(3,
                                   1,
                                   1,
                                   1),
                        name = "Locale") +
  geom_hline (yintercept = 0.50,
              color = "grey50",
              linetype = 2) +
  scale_x_continuous(limits = c(25, 40),
                     breaks = seq(from = 26, to = 40, by = 2)) +
  scale_y_continuous(limits = c(0, 1.01),
                     breaks = seq(from = 0, to = 1, by = 0.25)) +
  labs(x = "Heat Shock Temperature (°C)",
       y = "Proportion Survived") +
  theme_classic() +
  theme(panel.grid.major.y = element_line(color = "grey85",
                                          linetype = 3),
        axis.line = element_line(color = "grey70"),
        axis.ticks = element_line(color = "grey70"))
# `geom_smooth()` using formula 'y ~ x'
# Warning: Removed 39 rows containing non-finite values (stat_smooth).


# Correct legends --------------------------------------------------------------
surv %>%
  mutate(region = ifelse(is.na (region),
                         case_when(str_detect(Genotype, "FLCK") |
                                   str_detect(Genotype, "FR") |
                                   str_detect(Genotype, "JP") ~ "New Genotypes"),
                         str_to_sentence(region))) %>%
  mutate(region = factor(region, levels = c("Temperate", "Tropical", 
                                            "New Genotypes"))) %>%
  filter(!str_detect(Genotype, "Canton") & 
           !str_detect(Genotype, "X") &
           !str_detect(region, "Temperate")) %>%
  ggplot(aes(x = Temperature,
             y = Hatched_48/Eggs,
             color = region,
             linetype = region)) +
  geom_smooth (aes(group = Genotype),
               method = drm,
               method.args = list(fct = LL.3()),
               se = FALSE) +
  scale_color_manual(values = c("#FF2E91", "slateblue4"),
                     name = "Locale") +
  scale_linetype_manual(values = c(3, 1),
                        name = "Locale") +
  geom_hline (yintercept = 0.50,
              color = "grey50",
              linetype = 2) +
  scale_x_continuous(limits = c(25, 40),
                     breaks = seq(from = 26, to = 40, by = 2)) +
  scale_y_continuous(limits = c(0, 1.01),
                     breaks = seq(from = 0, to = 1, by = 0.25)) +
  labs(x = "Heat Shock Temperature (°C)",
       y = "Proportion Survived") +
  theme_classic(base_size = 16) +
  theme(panel.grid.major.y = element_line(color = "grey85",
                                          linetype = 3),
        axis.line = element_line(color = "grey70"),
        axis.ticks = element_line(color = "grey70"))
# `geom_smooth()` using formula 'y ~ x'
# Warning: Removed 39 rows containing non-finite values (stat_smooth).



# ggsave("new_data_vs_tropical_final.png", 
#        height = 6, 
#        width = 9)

# New and Temperate ------------------------------------------------------------
surv %>%
  mutate(region = ifelse(is.na (region),
                         case_when(str_detect(Genotype, "FLCK") |
                                     str_detect(Genotype, "FR") |
                                     str_detect(Genotype, "JP") ~ "New Genotypes"),
                         str_to_sentence(region))) %>%
  mutate(region = factor(region, levels = c("Temperate", "Tropical", 
                                            "New Genotypes"))) %>%
  filter(!str_detect(Genotype, "Canton") & 
           !str_detect(Genotype, "X") &
           !str_detect(region, "Tropical")) %>%
  ggplot(aes(x = Temperature,
             y = Hatched_48/Eggs,
             color = region,
             linetype = region)) +
  geom_smooth (aes(group = Genotype),
               method = drm,
               method.args = list(fct = LL.3()),
               se = FALSE) +
  scale_color_manual(values = c("#8CC2EA", "slateblue4"),
                     name = "Locale") +
  scale_linetype_manual(values = c(3, 1),
                        name = "Locale") +
  geom_hline (yintercept = 0.50,
              color = "grey50",
              linetype = 2) +
  scale_x_continuous(limits = c(25, 40),
                     breaks = seq(from = 26, to = 40, by = 2)) +
  scale_y_continuous(limits = c(0, 1.01),
                     breaks = seq(from = 0, to = 1, by = 0.25)) +
  labs(x = "Heat Shock Temperature (°C)",
       y = "Proportion Survived") +
  theme_classic(base_size = 16) +
  theme(panel.grid.major.y = element_line(color = "grey85",
                                          linetype = 3),
        axis.line = element_line(color = "grey70"),
        axis.ticks = element_line(color = "grey70"))
# `geom_smooth()` using formula 'y ~ x'


# ggsave("new_data_vs_temperate_final.png", 
#        height = 6, 
#        width = 9)


# Normalized to 25°C survival (tropical) --------------------------------------
surv %>%
  filter(!str_detect(Genotype, "Canton") & 
           !str_detect(Genotype, "X")) %>%
  group_by(Genotype, Temperature) %>%
  summarise(Prop_Hatched = sum(Hatched_48)/sum(Eggs)) %>%
  mutate(Prop_Hatched_norm = Prop_Hatched/Prop_Hatched[Temperature == min(Temperature)]) %>%
  mutate(region = case_when(str_detect(Genotype, "FLCK") |
                                     str_detect(Genotype, "FR") |
                                     str_detect(Genotype, "JP") ~ "New Genotypes",
                                   str_detect(Genotype, "FLCK") |
                                     str_detect(Genotype, "MU") | 
                                     str_detect(Genotype, "CH") |
                                     str_detect(Genotype, "GH") |
                                     str_detect(Genotype, "SK") |
                                     str_detect(Genotype, "GU") ~ "Tropical",
                                   str_detect(Genotype, "VTECK_") |
                                     str_detect(Genotype, "BEA") | 
                                     str_detect(Genotype, "RFM") ~ "Temperate")) %>%
  mutate(region = factor(region, levels = c("Temperate", "Tropical", 
                                            "New Genotypes"))) %>%
  filter(region != "Temperate") %>%
  ggplot(aes(x = Temperature,
             y = Prop_Hatched_norm,
             color = region)) +
  geom_smooth(aes(group = Genotype),
               method = drm,
               method.args = list(fct = LL.3()),
               se = FALSE) +
  scale_color_manual(values = c("#FF2E91", "slateblue4"),
                     name = "Locale") +
  scale_linetype_manual(values = c(3, 1),
                        name = "Locale") +
  geom_hline (yintercept = 0.50,
              color = "grey50",
              linetype = 2) +
  scale_x_continuous(limits = c(25, 40),
                     breaks = seq(from = 26, to = 40, by = 2)) +
  scale_y_continuous(limits = c(0, 1.01),
                     breaks = seq(from = 0, to = 1, by = 0.25)) +
  labs(x = "Heat Shock Temperature (°C)",
       y = "Proportion Survived") +
  theme_classic() +
  theme(panel.grid.major.y = element_line(color = "grey85",
                                          linetype = 3),
        axis.line = element_line(color = "grey70"),
        axis.ticks = element_line(color = "grey70"))
# `summarise()` has grouped output by 'Genotype'. You can override using the
# `.groups` argument.
# `geom_smooth()` using formula 'y ~ x'
# Warning: Removed 18 rows containing non-finite values (stat_smooth).
# Warning in pt(x, df.residual(object)): NaNs produced

# Warning in pt(x, df.residual(object)): NaNs produced

# Warning in pt(x, df.residual(object)): NaNs produced

# Warning in pt(x, df.residual(object)): NaNs produced
# Warning in sqrt(resVar): NaNs produced
# Warning in sqrt(diag(varMat)): NaNs produced
# Warning in sqrt(resVar): NaNs produced
# Warning in sqrt(diag(varMat)): NaNs produced
# Warning in pt(x, df.residual(object)): NaNs produced

# Warning in pt(x, df.residual(object)): NaNs produced

# Warning in pt(x, df.residual(object)): NaNs produced

# Warning in pt(x, df.residual(object)): NaNs produced



# Normalized to 25°C survival (temperate) --------------------------------------
surv %>%
  filter(!str_detect(Genotype, "Canton") & 
           !str_detect(Genotype, "X")) %>%
  group_by(Genotype, Temperature) %>%
  summarise(Prop_Hatched = sum(Hatched_48)/sum(Eggs)) %>%
  mutate(Prop_Hatched_norm = Prop_Hatched/Prop_Hatched[Temperature == min(Temperature)]) %>%
  mutate(region = case_when(str_detect(Genotype, "VTECK_") |
                              str_detect(Genotype, "BEA") | 
                              str_detect(Genotype, "RFM") ~ "Temperate",
                            str_detect(Genotype, "FLCK") |
                              str_detect(Genotype, "FR") |
                              str_detect(Genotype, "JP") ~ "New Genotypes",
                            str_detect(Genotype, "FLCK") |
                              str_detect(Genotype, "MU") | 
                              str_detect(Genotype, "CH") |
                              str_detect(Genotype, "GH") |
                              str_detect(Genotype, "SK") |
                              str_detect(Genotype, "GU") ~ "Tropical")) %>%
  mutate(region = factor(region, levels = c("Temperate", "Tropical", 
                                            "New Genotypes"))) %>%
  filter(region != "Tropical") %>%
  ggplot(aes(x = Temperature,
             y = Prop_Hatched_norm,
             color = region)) +
  geom_smooth(aes(group = Genotype),
              method = drm,
              method.args = list(fct = LL.3()),
              se = FALSE) +
  scale_color_manual(values = c("#FF2E91", "slateblue4"),
                     name = "Locale") +
  scale_linetype_manual(values = c(3, 1),
                        name = "Locale") +
  geom_hline (yintercept = 0.50,
              color = "grey50",
              linetype = 2) +
  scale_x_continuous(limits = c(25, 40),
                     breaks = seq(from = 26, to = 40, by = 2)) +
  scale_y_continuous(limits = c(0, 1.01),
                     breaks = seq(from = 0, to = 1, by = 0.25)) +
  labs(x = "Heat Shock Temperature (°C)",
       y = "Proportion Survived") +
  theme_classic() +
  theme(panel.grid.major.y = element_line(color = "grey85",
                                          linetype = 3),
        axis.line = element_line(color = "grey70"),
        axis.ticks = element_line(color = "grey70"))
# `summarise()` has grouped output by 'Genotype'. You can override using the
# `.groups` argument.
# `geom_smooth()` using formula 'y ~ x'
# Warning: Removed 15 rows containing non-finite values (stat_smooth).
# NaNs produced
# NaNs produced
# NaNs produced
# NaNs produced
# Warning in sqrt(resVar): NaNs produced
# Warning in sqrt(diag(varMat)): NaNs produced
# Warning in sqrt(resVar): NaNs produced
# Warning in sqrt(diag(varMat)): NaNs produced
# Warning in pt(x, df.residual(object)): NaNs produced

# Warning in pt(x, df.residual(object)): NaNs produced

# Warning in pt(x, df.residual(object)): NaNs produced

# Warning in pt(x, df.residual(object)): NaNs produced



# ------------------------------------------------------------------------------
# Stat time
lts_df <- lt_surv_curv  %>%
  filter(!str_detect(Genotype, "X")) %>%
  mutate(locale = case_when(str_detect(Genotype, "FLCK") ~ "Florida",
                            str_detect(Genotype, "VT") |
                              str_detect(Genotype, "BEA") |
                              str_detect(Genotype, "RFM") ~ "Temperate",
                            str_detect(Genotype, "JP") ~ "Japan",
                            str_detect(Genotype, "FR") ~ "France",
                            TRUE ~ "Tropical"))

# ANOVA regular style
mod_aov <- aov(lt_50 ~ locale, data = lts_df)
summary(mod_aov)
#             Df Sum Sq Mean Sq F value   Pr(>F)    
# locale       4  5.633  1.4083   8.044 0.000172 ***
# Residuals   29  5.077  0.1751                     
# ---
# Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

TukeyHSD(mod_aov)
#   Tukey multiple comparisons of means
#     95% family-wise confidence level
# 
# Fit: aov(formula = lt_50 ~ locale, data = lts_df)
# 
# $locale
#                           diff         lwr       upr     p adj
# France-Florida      0.13823093 -0.85483551 1.1312974 0.9940442
# Japan-Florida      -0.47281643 -1.46588288 0.5202500 0.6423868
# Temperate-Florida  -0.02686905 -0.59643172 0.5426936 0.9999152
# Tropical-Florida    1.06164915  0.32517136 1.7981269 0.0020592
# Japan-France       -0.61104736 -1.82730040 0.6052057 0.5952278
# Temperate-France   -0.16509998 -1.06925269 0.7390527 0.9834059
# Tropical-France     0.92341822 -0.09417207 1.9410085 0.0894335
# Temperate-Japan     0.44594738 -0.45820532 1.3501001 0.6117448
# Tropical-Japan      1.53446558  0.51687529 2.5520559 0.0012293
# Tropical-Temperate  1.08851820  0.47719940 1.6998370 0.0001424

# Combining all new stuff to the same locale
lts_df_new <- lt_surv_curv  %>%
  filter(!str_detect(Genotype, "X")) %>%
  mutate(locale = case_when(str_detect(Genotype, "FLCK") ~ "New Genotype",
                            str_detect(Genotype, "VT") |
                              str_detect(Genotype, "BEA") |
                              str_detect(Genotype, "RFM") ~ "Temperate",
                            str_detect(Genotype, "JP") ~ "New Genotype",
                            str_detect(Genotype, "FR") ~ "New Genotype",
                            TRUE ~ "Tropical"))

# ANOVA regular style
mod_aov_new <- aov(lt_50 ~ locale, data = lts_df_new)
summary(mod_aov_new)
#             Df Sum Sq Mean Sq F value   Pr(>F)    
# locale       2  5.193   2.596   14.59 3.43e-05 ***
# Residuals   31  5.518   0.178                     
# ---
# Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

TukeyHSD(mod_aov_new)
#   Tukey multiple comparisons of means
#     95% family-wise confidence level
# 
# Fit: aov(formula = lt_50 ~ locale, data = lts_df_new)
# 
# $locale
#                              diff        lwr       upr     p adj
# Temperate-New Genotype 0.04004805 -0.3656105 0.4457066 0.9680119
# Tropical-New Genotype  1.12856625  0.5598455 1.6972870 0.0000869
# Tropical-Temperate     1.08851820  0.5666243 1.6104121 0.0000427

# Kruskal-Wallis -- 
mod_aov_ks <- kruskal.test(lt_50 ~ locale, data = lts_df)
mod_aov_ks
# 
#   Kruskal-Wallis rank sum test
# 
# data:  lt_50 by locale
# Kruskal-Wallis chi-squared = 14.732, df = 4, p-value = 0.005292

pairwise.wilcox.test(x = lts_df$lt_50, g = lts_df$locale)
# 
#   Pairwise comparisons using Wilcoxon rank sum exact test 
# 
# data:  lts_df$lt_50 and lts_df$locale 
# 
#           Florida France  Japan   Temperate
# France    1.00000 -       -       -        
# Japan     1.00000 1.00000 -       -        
# Temperate 1.00000 1.00000 0.68571 -        
# Tropical  0.03896 0.68571 0.68571 0.00047  
# 
# P value adjustment method: holm

10.2 Catherine & Caleb

# ------------------------------------------------------------------------------
# Catherine & Caleb data
# July 19, 2022
# TS O'Leary
# ------------------------------------------------------------------------------

# Load libraries
require(tidyverse)

# Load data
df <- read_csv("student_data/changes_in_AA.csv")
# Rows: 4 Columns: 3
# ── Column specification ────────────────────────────────────────────────────────
# Delimiter: ","
# chr (2): Adult_Acclimation_State, Direction
# dbl (1): value
# 
# ℹ Use `spec()` to retrieve the full column specification for this data.
# ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# Plot data

# Minimal plot
ggplot(data = df,
       aes(y = value,
           x = Adult_Acclimation_State,
           fill = Direction)) +
  ylab("Number of differentially expressed genes") +
  geom_bar(stat = "identity")


# With the position dodge
ggplot(data = df,
       aes(y = value,
           x = Adult_Acclimation_State,
           fill = Direction)) +
  ylab("Number of differentially\nexpressed genes") +
  geom_bar(stat = "identity",
           position = "dodge") +
  theme_classic(base_size = 16)


# Those underscores kinda stink
ggplot(data = df,
       aes(y = value,
           x = Adult_Acclimation_State,
           fill = Direction)) +
  geom_bar(stat = "identity",
           position = "dodge") +
  scale_x_discrete(labels = c("18°C", "30°C")) +
  ylab("Number of differentially\nexpressed genes") +
  xlab("Adult Acclimation State") +
  theme_classic(base_size = 16)



# Okay lets try the thing where the directions depend on the directions
df %>%
  mutate(value = ifelse(Direction == "Down", -value, value)) %>%
  ggplot(aes(x = value,
             y = Adult_Acclimation_State,
             fill = Direction)) +
  geom_bar(stat = "identity",
           position = "identity",
           width = 0.5) +
  geom_vline(xintercept = 0) +
  geom_text(aes(label = abs(value),
                hjust = 1)) +
  scale_y_discrete(labels = c("18°C", "30°C"),
                   name = "Adult Acclimation State") +
  scale_x_continuous(position = "top",
                     name = "Number of differentially expressed genes",
                     limits = c(-500, 500),
                     breaks = seq(from = -400, to = 400, by = 200),
                     labels = abs(seq(from = -400, to = 400, by = 200))) +
  scale_fill_manual(values = c("#F58161", "#B3D89C")) +
  theme_classic(base_size = 16) +
  theme(axis.line.y = element_blank(),
        legend.position = "none",
        panel.grid.major.x = element_line(color = "grey95"),
        axis.title.x = element_blank())


# ggsave("cath_cal_fig.pdf", height = 4, width = 7)
# ------------------------------------------------------------------------------
# Catherine & Caleb data
# July 19, 2022
# TS O'Leary
# ------------------------------------------------------------------------------

# Load data
df <- read_csv("student_data/changes_in_HS_CS_at_AA.csv")
# Rows: 12 Columns: 4
# ── Column specification ────────────────────────────────────────────────────────
# Delimiter: ","
# chr (2): Shock, Direction
# dbl (2): Acclimation_Temp, value
# 
# ℹ Use `spec()` to retrieve the full column specification for this data.
# ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# Make plot
df %>%
  mutate(Acclimation_Temp = factor(Acclimation_Temp)) %>%
  ggplot(aes(x = Acclimation_Temp,
             y = value,
             fill = Direction)) +
  geom_bar(stat = "identity",
           position = "dodge") +
  theme_bw() +
  facet_wrap(~ Shock)   


# Make plot
df %>%
  mutate(Acclimation_Temp = factor(paste0(Acclimation_Temp, "°C"),
                                   levels = c("30°C", "25°C", "18°C"))) %>%
  mutate(DEGs = ifelse(Direction == "Up", value, -value)) %>%
  ggplot(aes(y = Acclimation_Temp,
             x = DEGs,
             fill = Direction)) +
  geom_bar(stat = "identity",
           position = "identity",
           color = "grey50") +
  scale_fill_manual(values = c("#a2d2ff", "#ffafcc")) +
  theme_classic() +
  scale_x_continuous(breaks = seq(from = -1500, to = 1500, by = 500),
                     label = abs(seq(from = -1500, to = 1500, by = 500)),
                     name = "Number of differentially expressed genes") + 
  ylab("Acclimation Temperature") +
  theme(legend.position = "none",
        panel.grid.major.x = element_line(color = "grey70", 
                                        size = 0.5),
        panel.grid.minor.x = element_line(color = "grey90", 
                                        size = 0.5)) +
  facet_wrap(~ Shock,
             nrow = 2)


# Change shock name
df %>%
  mutate(Shock = ifelse(Shock == "CSvsNS", 
                        "Acute Cold Shock", 
                        "Acute Heat Shock")) %>%
  mutate(Acclimation_Temp = factor(paste0(Acclimation_Temp, "°C"),
                                   levels = c("30°C", "25°C", "18°C"))) %>%
  mutate(DEGs = ifelse(Direction == "Up", value, -value)) %>%
  ggplot(aes(y = Acclimation_Temp,
             x = DEGs,
             fill = Direction)) +
  geom_bar(stat = "identity",
           position = "identity",
           color = "grey50") +
  scale_fill_manual(values = c("#a2d2ff", "#ffafcc")) +
  theme_classic() +
  scale_x_continuous(breaks = seq(from = -1500, to = 1500, by = 500),
                     label = abs(seq(from = -1500, to = 1500, by = 500)),
                     name = "Number of differentially expressed genes") + 
  ylab("Acclimation Temperature") +
  theme(legend.position = "none",
        panel.grid.major.x = element_line(color = "grey70", 
                                        size = 0.5),
        panel.grid.minor.x = element_line(color = "grey90", 
                                        size = 0.5)) +
  facet_wrap(~ Shock,
             nrow = 2)


# Dodge by shock
df %>%
  mutate(Shock = ifelse(Shock == "CSvsNS", 
                        "Acute Cold Shock", 
                        "Acute Heat Shock")) %>%
  mutate(Acclimation_Temp = factor(paste0(Acclimation_Temp, "°C"),
                                   levels = c("30°C", "25°C", "18°C"))) %>%
  mutate(DEGs = ifelse(Direction == "Up", value, -value)) %>%
  ggplot(aes(y = Acclimation_Temp,
             x = DEGs,
             fill = Direction,
             pattern = Shock)) +
  ggpattern::geom_bar_pattern(aes(group = Shock), pattern_color = NA,
                              color = "grey50",
                              width = 0.65,
                              stat = "identity",
                              position = position_dodge(width = 0.7),
                    pattern_fill = "red",
                    pattern_angle = 45,
                    pattern_density = 0.5,
                    pattern_spacing = 0.025,
                    pattern_key_scale_factor = 1) +
  geom_vline(xintercept = 0, color = "grey20") +
  scale_fill_manual(values = c("#a2d2ff", "#ffafcc")) +
  theme_classic(base_size = 12) +
  scale_x_continuous(breaks = seq(from = -1500, to = 1500, by = 500),
                     label = abs(seq(from = -1500, to = 1500, by = 500)),
                     name = "Number of differentially expressed genes",
                     position = "top") + 
  ggpattern::scale_pattern_manual(values = c(`Acute Cold Shock` = "none", 
                                             `Acute Heat Shock` = "circle")) +
  ylab("Acclimation Temperature") +
  theme(legend.position = "none",
        axis.line.y = element_blank(),
        panel.grid.major.x = element_line(color = "grey70", 
                                        size = 0.5),
        panel.grid.minor.x = element_line(color = "grey90", 
                                        size = 0.5)) 




df %>%
  mutate(Shock = ifelse(Shock == "CSvsNS", 
                        "Acute Cold Shock", 
                        "Acute Heat Shock")) %>%
  mutate(Acclimation_Temp = factor(paste0(Acclimation_Temp, "°C"),
                                   levels = c("30°C", "25°C", "18°C"))) %>%
  mutate(DEGs = ifelse(Direction == "Up", value, -value)) %>%
  ggplot(aes(y = Acclimation_Temp,
             x = DEGs,
             fill = Direction,
             pattern = Shock)) +
  annotate("rect",
           fill = "grey95",
           xmin = -Inf, xmax = Inf,
           ymin = 0.6, 
           ymax = 1.4,
           alpha = 0.5) +
  annotate("rect",
           fill = "grey95",
           xmin = -Inf, xmax = Inf,
           ymin = 1.6, 
           ymax = 2.4,
           alpha = 0.5) +
    annotate("rect",
           fill = "grey95",
           xmin = -Inf, xmax = Inf,
           ymin = 2.6, 
           ymax = 3.4,
           alpha = 0.5) +
  ggpattern::geom_bar_pattern(aes(group = Shock), pattern_color = NA,
                              color = "grey50",
                              width = 0.65,
                              stat = "identity",
                              position = position_dodge(width = 0.7),
                    pattern_fill = "firebrick",
                    pattern_angle = 45,
                    pattern_density = 0.3,
                    pattern_spacing = 0.025,
                    pattern_key_scale_factor = 1) +
  geom_vline(xintercept = 0, color = "grey20") +
  scale_fill_manual(values = c("#F58161", "#B3D89C")) +
  theme_classic(base_size = 12) +
  scale_x_continuous(breaks = seq(from = -1500, to = 1500, by = 500),
                     label = abs(seq(from = -1500, to = 1500, by = 500)),
                     name = "Number of differentially expressed genes",
                     position = "top") + 
  ggpattern::scale_pattern_manual(values = c(`Acute Cold Shock` = "none", 
                                             `Acute Heat Shock` = "circle")) +
  ylab("Acclimation Temperature") +
  scale_y_discrete(drop = FALSE) +
  theme(legend.position = "none",
        axis.line.y = element_blank(),
        axis.line.x = element_line(color = "grey50"),
        axis.ticks = element_line(color = "grey50"),
        panel.grid.major.x = element_line(color = "grey70", 
                                        size = 0.5),
        panel.grid.minor.x = element_line(color = "grey90", 
                                        size = 0.5)) 

10.3 Nivea & Sara

# Load packages
require(tidyverse)
require(ggrepel)
# Loading required package: ggrepel

# Load data
df <- read_csv("student_data/send_to_thomas_rRNA.csv")
# Rows: 12545 Columns: 6
# ── Column specification ────────────────────────────────────────────────────────
# Delimiter: ","
# dbl (6): baseMean, log2FoldChange, lfcSE, stat, pvalue, padj
# 
# ℹ Use `spec()` to retrieve the full column specification for this data.
# ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# Make plot
df %>%
  filter(!is.na(padj)) %>%
  ggplot(mapping = aes(x = log2FoldChange, 
                     y = -log10(padj))) +
  geom_point(mapping = aes(fill = padj < 0.05 & abs(log2FoldChange) > 2),
             color = "grey50",
             shape = 21,
             alpha = 0.8) +
  geom_hline(yintercept = -log10(0.05), 
             linetype = "dashed", 
             color = "grey50") +
  geom_vline(xintercept = 0, 
             color = "grey20") +
  geom_vline(xintercept = -2, 
             linetype = "dashed",
             color = "grey50") +
  geom_vline(xintercept = 2, 
             linetype = "dashed",
             color = "grey50") +
  scale_fill_manual(values = c("grey50", "red")) +
  scale_size_manual(values = c(1, 2)) +
  xlab(expression(paste(log[2], "(fold-change)"))) +
  ylab(expression(paste(-log[10], "(p-value)"))) +
  scale_x_continuous(limits = c(-12, 12)) +
  ylim(c(0, 140)) +
  theme_minimal() +
  theme(legend.position = "none")

# ------------------------------------------------------------------------------
# Nivea & Sara -- rRNA depl data -- figs and stuff
# July 21, 2022
# TS O'Leary
# ------------------------------------------------------------------------------

# Load libraries
require(tidyverse)
require(DESeq2)
# Loading required package: DESeq2
# Loading required package: S4Vectors
# Loading required package: stats4
# Loading required package: BiocGenerics
# 
# Attaching package: 'BiocGenerics'
# The following objects are masked from 'package:dplyr':
# 
#     combine, intersect, setdiff, union
# The following objects are masked from 'package:stats':
# 
#     IQR, mad, sd, var, xtabs
# The following objects are masked from 'package:base':
# 
#     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
#     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
#     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
#     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
#     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
#     union, unique, unsplit, which.max, which.min
# 
# Attaching package: 'S4Vectors'
# The following objects are masked from 'package:dplyr':
# 
#     first, rename
# The following object is masked from 'package:tidyr':
# 
#     expand
# The following objects are masked from 'package:base':
# 
#     expand.grid, I, unname
# Loading required package: IRanges
# 
# Attaching package: 'IRanges'
# The following objects are masked from 'package:dplyr':
# 
#     collapse, desc, slice
# The following object is masked from 'package:purrr':
# 
#     reduce
# Loading required package: GenomicRanges
# Loading required package: GenomeInfoDb
# Loading required package: SummarizedExperiment
# Loading required package: MatrixGenerics
# Loading required package: matrixStats
# 
# Attaching package: 'matrixStats'
# The following object is masked from 'package:dplyr':
# 
#     count
# 
# Attaching package: 'MatrixGenerics'
# The following objects are masked from 'package:matrixStats':
# 
#     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
#     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
#     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
#     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
#     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
#     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
#     colWeightedMeans, colWeightedMedians, colWeightedSds,
#     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
#     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
#     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
#     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
#     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
#     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
#     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
#     rowWeightedSds, rowWeightedVars
# Loading required package: Biobase
# Welcome to Bioconductor
# 
#     Vignettes contain introductory material; view with
#     'browseVignettes()'. To cite Bioconductor, see
#     'citation("Biobase")', and for packages 'citation("pkgname")'.
# 
# Attaching package: 'Biobase'
# The following object is masked from 'package:MatrixGenerics':
# 
#     rowMedians
# The following objects are masked from 'package:matrixStats':
# 
#     anyMissing, rowMedians
require(ggrepel)

# Load data
dds <- readRDS(here::here("student_data/reu_workshop_nivea_dds_rRNA.rds"))

# Load specific result comparisons
res_37 <- results(dds,
                  name = "condition_Hot37_vs_Control",
                  alpha = 0.05)
res_34 <- results(dds,
                  name = "condition_Hot34_vs_Control",
                  alpha = 0.05)
res_10 <- results(dds,
                  name = "condition_Cold10_vs_Control",
                  alpha = 0.05)
res_04 <- results(dds,
                  name = "condition_Cold4_vs_Control",
                  alpha = 0.05)

# Take a peak at the summary values
summary(res_37)
# 
# out of 11815 with nonzero total read count
# adjusted p-value < 0.05
# LFC > 0 (up)       : 491, 4.2%
# LFC < 0 (down)     : 205, 1.7%
# outliers [1]       : 1, 0.0085%
# low counts [2]     : 917, 7.8%
# (mean count < 1)
# [1] see 'cooksCutoff' argument of ?results
# [2] see 'independentFiltering' argument of ?results
summary(res_34)
# 
# out of 11815 with nonzero total read count
# adjusted p-value < 0.05
# LFC > 0 (up)       : 421, 3.6%
# LFC < 0 (down)     : 115, 0.97%
# outliers [1]       : 1, 0.0085%
# low counts [2]     : 1604, 14%
# (mean count < 3)
# [1] see 'cooksCutoff' argument of ?results
# [2] see 'independentFiltering' argument of ?results
summary(res_10)
# 
# out of 11815 with nonzero total read count
# adjusted p-value < 0.05
# LFC > 0 (up)       : 338, 2.9%
# LFC < 0 (down)     : 699, 5.9%
# outliers [1]       : 1, 0.0085%
# low counts [2]     : 2519, 21%
# (mean count < 9)
# [1] see 'cooksCutoff' argument of ?results
# [2] see 'independentFiltering' argument of ?results
summary(res_04)
# 
# out of 11815 with nonzero total read count
# adjusted p-value < 0.05
# LFC > 0 (up)       : 4, 0.034%
# LFC < 0 (down)     : 2, 0.017%
# outliers [1]       : 1, 0.0085%
# low counts [2]     : 0, 0%
# (mean count < 1)
# [1] see 'cooksCutoff' argument of ?results
# [2] see 'independentFiltering' argument of ?results

# Convert to a tibble because TSO likes that format better
res_37 <- res_37 %>%
  as_tibble(rownames = "FBgn")
res_34 <- res_34 %>%
  as_tibble(rownames = "FBgn")
res_10 <- res_10 %>%
  as_tibble(rownames = "FBgn")
res_04 <- res_04 %>%
  as_tibble(rownames = "FBgn")

# Combine all these results together
res <- bind_rows(list("res_37" = res_37,
                      "res_34" = res_34,
                      "res_10" = res_10,
                      "res_04" = res_04),
                 .id = "temp")

# Let's count 'em up
res %>%
  filter(padj < 0.05) %>%
  group_by(temp, log2FoldChange > 0) %>%
  tally() %>%
  dplyr::rename(direction = `log2FoldChange > 0`) %>%
  mutate(direction = ifelse(direction, "Up", "Down"))
# # A tibble: 8 × 3
# # Groups:   temp [4]
#   temp   direction     n
#   <chr>  <chr>     <int>
# 1 res_04 Down          2
# 2 res_04 Up            4
# 3 res_10 Down        699
# 4 res_10 Up          338
# 5 res_34 Down        115
# 6 res_34 Up          421
# 7 res_37 Down        205
# 8 res_37 Up          491

# Volcano Plots --
# Cold only
res %>%
  filter(temp == "res_10" | temp == "res_04") %>%
  mutate(temp = case_when(temp == "res_04" ~ "4°C vs 25°C",
                          temp == "res_10" ~ "10°C vs 25°C",
                          temp == "res_34" ~ "34°C vs 25°C",
                          temp == "res_37" ~ "37°C vs 25°C")) %>%
  mutate(temp = factor(temp, levels = c("4°C vs 25°C",
                                        "10°C vs 25°C",
                                        "34°C vs 25°C",
                                        "37°C vs 25°C"))) %>%
  filter(!is.na(padj)) %>%
  ggplot() +
  geom_point(aes(x = log2FoldChange,
                 y = -log10(padj),
                 color = padj < 0.05)) +
  geom_vline(xintercept = 0,
             color = "grey70") +
  geom_hline(yintercept = -log10(0.05),
             color = "grey70") +
  scale_color_manual(values = c("grey70",
                                "firebrick",
                                "grey80"),
                     name = "Differentially expressed genes",
                     label = c("Not signigicant", "Significant")) +
  xlim(c(-14,14)) +
  theme_classic() +
  theme(legend.position = "top") +
  facet_wrap(~ temp)


# Hot only
res %>%
  filter(temp == "res_34" | temp == "res_37") %>%
  mutate(temp = case_when(temp == "res_04" ~ "4°C vs 25°C",
                          temp == "res_10" ~ "10°C vs 25°C",
                          temp == "res_34" ~ "34°C vs 25°C",
                          temp == "res_37" ~ "37°C vs 25°C")) %>%
  mutate(temp = factor(temp, levels = c("4°C vs 25°C",
                                        "10°C vs 25°C",
                                        "34°C vs 25°C",
                                        "37°C vs 25°C"))) %>%
  filter(!is.na(padj)) %>%
  ggplot() +
  geom_point(aes(x = log2FoldChange,
                 y = -log10(padj),
                 color = padj < 0.05)) +
  geom_vline(xintercept = 0,
             color = "grey70") +
  geom_hline(yintercept = -log10(0.05),
             color = "grey70") +
  scale_color_manual(values = c("grey70",
                                "firebrick",
                                "grey80"),
                     name = "Differentially expressed genes",
                     label = c("Not signigicant", "Significant")) +
  xlim(c(-14,14)) +
  theme_classic() +
  theme(legend.position = "top") +
  facet_wrap(~ temp)


# All four comparisons
res %>%
  mutate(temp = case_when(temp == "res_04" ~ "4°C vs 25°C",
                          temp == "res_10" ~ "10°C vs 25°C",
                          temp == "res_34" ~ "34°C vs 25°C",
                          temp == "res_37" ~ "37°C vs 25°C")) %>%
  mutate(temp = factor(temp, levels = c("4°C vs 25°C",
                                        "10°C vs 25°C",
                                        "34°C vs 25°C",
                                        "37°C vs 25°C"))) %>%
  filter(!is.na(padj)) %>%
  ggplot() +
  geom_point(aes(x = log2FoldChange,
                 y = -log10(padj),
                 color = padj < 0.05)) +
  geom_vline(xintercept = 0,
             color = "grey70") +
  geom_hline(yintercept = -log10(0.05),
             color = "grey70",
             linetype = 2) +
  scale_color_manual(values = c("grey70",
                                "firebrick",
                                "grey80"),
                     name = "Differentially expressed genes",
                     label = c("Not signigicant", "Significant")) +
  xlim(c(-14,14)) +
  theme_classic() +
  theme(legend.position = "top") +
  facet_wrap(~ temp)




# Let's convert to gene symbol from FBgn
library(AnnotationDbi)
# 
# Attaching package: 'AnnotationDbi'
# The following object is masked from 'package:MASS':
# 
#     select
# The following object is masked from 'package:dplyr':
# 
#     select
library(org.Dm.eg.db)
# 

res$symbol <- mapIds(org.Dm.eg.db,
                     keys = res$FBgn,
                     column = "SYMBOL",
                     keytype = "FLYBASE",
                     multiVals = "first")
# 'select()' returned 1:1 mapping between keys and columns


# Let's try and add symbols to the volcano plot
res %>%
  filter(temp == "res_37") %>%
  filter(!is.na(padj)) %>%
  ggplot(aes(x = log2FoldChange,
             y = -log10(padj))) +
  geom_point(aes(color = padj < 0.05)) +
  geom_vline(xintercept = 0,
             color = "grey70") +
  geom_hline(yintercept = -log10(0.05),
             color = "grey70") +
  geom_text_repel(data = res %>%
                    filter(temp == "res_37") %>%
                    arrange(padj) %>%
                    head(10),
                  aes(label = symbol)) +
  scale_color_manual(values = c("grey70",
                                "firebrick",
                                "grey80"),
                     name = "Differentially expressed genes",
                     label = c("Not signigicant", "Significant")) +
  xlim(c(-14,14)) +
  theme_classic() +
  theme(legend.position = "top")