library(tidyverse)
library(kableExtra)
library(knitr)

dir.create("outputs/107", showWarnings = FALSE, recursive = TRUE)

NT note: the rmd used to produce rosa_nodups is called “Thompson-etal-RoSA-amplicon-data.rmd” and is in the paper_code folder on NT’s laptop.

load rosa data that has been prefiltered to remove duplicates and missing data.

rosa_nodups <- read_rds("./data/107-RoSA-popgen-survey-data.rds")

First let’s look at how many samples have recombinant genotypes.

rosa_nodups %>%
  filter(!str_detect(rosa, "\\?")) %>%
  mutate(is_recomb = !rosa %in% c("HHHHHHHH", "LLLLLLLL", "EEEEEEEE")) %>%
  group_by(is_recomb) %>%
  summarise(n_type = n()) %>%
  kable("html") %>%
  kable_styling("striped", full_width = FALSE)

is_recomb	n_type
FALSE	2556
TRUE	31

n_recomb <- rosa_nodups %>%
  filter(!str_detect(rosa, "\\?")) %>%
  mutate(is_recomb = !rosa %in% c("HHHHHHHH", "LLLLLLLL", "EEEEEEEE")) %>%
  group_by(is_recomb) %>%
  summarise(n_type = n()) %>%
  .[2, "n_type"]

n_nonrecomb <- rosa_nodups %>%
  filter(!str_detect(rosa, "\\?")) %>%
  mutate(is_recomb = !rosa %in% c("HHHHHHHH", "LLLLLLLL", "EEEEEEEE")) %>%
  group_by(is_recomb) %>%
  summarise(n_type = n()) %>%
  .[1, "n_type"]

There are

recombinants which makes a

frequency of recombinant genotypes.

Frequency table of non-recombinant haplotypes

Create a frequency table of non-recombinant genotypes in the populations we want for the paper.

paper_pops <- c("Siletz River spring", "Siletz River fall", "Iron Gate hatchery fall", "Salmon River spring", "Salmon River fall", "Salmon River unknown", "Trinity River hatchery spring", "Trinity River hatchery fall", "Trinity River unknown", "Eel River fall", "Russian River fall", "Sacramento River winter", "Coleman hatchery late-fall", "Butte Creek spring", "Butte Creek fall", "Butte Creek unknown", "Mill-Deer Creek", "Feather River hatchery spring", "Feather River hatchery fall", "Feather River unknown")

freq_table <- rosa_nodups %>%
  filter(rosa %in% c("HHHHHHHH", "LLLLLLLL", "EEEEEEEE")) %>%
  mutate(rosa = factor(rosa, levels = c("EEEEEEEE", "HHHHHHHH", "LLLLLLLL"))) %>%
  group_by(pop_name) %>%
  mutate(n_pop = n()) %>%
  group_by(pop_name, rosa) %>%
  mutate(
    n_rosa = n(),
    freq_rosa = round(n_rosa / n_pop, 3)
  ) %>%
  ungroup() %>%
  dplyr::select(pop_name, n_pop, rosa, freq_rosa) %>%
  distinct(pop_name, rosa, .keep_all = TRUE) %>%
  spread(rosa, freq_rosa) %>%
  arrange(factor(pop_name, levels = paper_pops)) %>%
  dplyr::select(pop_name, contains("EE"), contains("HH"), contains("LL"), n_pop) %>%
  rename(
    Collection = pop_name,
    EE = EEEEEEEE,
    EL = HHHHHHHH,
    LL = LLLLLLLL,
    n = n_pop
  ) %>%
  mutate(
    Ecotype = str_extract(Collection, "([spring]{6}|[fall]{4}|[unknown]{7}|[winter]{6}|[late]{4}[-]{1}[fall]{4})"),
    Ecotype = gsub("unknown", "mix", Ecotype),
    Ecotype = gsub("spring", "spring-run", Ecotype),
    Ecotype = gsub("fall", "fall-run", Ecotype),
    Ecotype = gsub("winter", "winter-run", Ecotype),
    Ecotype = gsub("late-fall", "late fall-run", Ecotype),
    Ecotype = ifelse(Ecotype == "late fall-run-run", "late fall-run", Ecotype)
  ) %>%
  dplyr::select(Collection, Ecotype, everything()) %>%
  replace_na(list(Ecotype = "mix")) %>%
  mutate(Collection = recode(
    Collection,
    `Coleman hatchery late-fall` = "Battle Creek late-fall",
    `Iron Gate hatchery fall` = "Klamath River fall"
  ))


freq_table %>%
  replace_na(list(EE = "", EL = "", LL = ""))

## write out frequency table
freq_table %>%
  replace_na(list(EE = "0.000", EL = "0.000", LL = "0.000")) %>%
  write_csv("outputs/107/RoSA_frequencies_tableS7.csv")

Frequency table including the within-RoSA recombinants

Create a table of all RoSA genotypes including recombinants.

rosa_hap_order <- rosa_nodups %>%
  filter(!str_detect(rosa, "\\?")) %>%
  distinct(rosa) %>%
  mutate(
    n_e = str_count(rosa, "E"),
    n_h = str_count(rosa, "H"),
    n_l = str_count(rosa, "L")
  ) %>%
  arrange(desc(n_e), desc(n_h), desc(n_l)) %>%
  pull(rosa)

count_table <- rosa_nodups %>%
  filter(!str_detect(rosa, "\\?")) %>%
  mutate(rosa_f = factor(rosa, levels = rosa_hap_order)) %>%
  group_by(pop_name) %>%
  mutate(n_pop = n()) %>%
  group_by(pop_name, rosa) %>%
  mutate(n_rosa = n()) %>%
  ungroup() %>%
  dplyr::select(pop_name, n_pop, rosa_f, n_rosa) %>%
  distinct(pop_name, rosa_f, .keep_all = TRUE) %>%
  spread(rosa_f, n_rosa) %>%
  arrange(factor(pop_name, levels = paper_pops)) %>%
  dplyr::select(pop_name, rosa_hap_order, n_pop) %>%
  rename(
    Collection = pop_name,
    n = n_pop
  ) %>%
  mutate(Collection = recode(
    Collection,
    `Coleman hatchery late-fall` = "Battle Creek late-fall",
    `Iron Gate hatchery fall` = "Klamath River fall"
  ))

# new versions of tidyr don't allow type conversions for NA replacements, so
# deal with it this way
count_table_char <- count_table %>%
  mutate_all(.funs = as.character)

count_table_char[is.na(count_table_char)] <- ""

## write out count table here.
write_csv(count_table_char, "outputs/107/RoSA_counts_all_genotypes_dataS3.csv")

Session Info

sessioninfo::session_info()

## ─ Session info ───────────────────────────────────────────────────────────────
##  setting  value                       
##  version  R version 3.6.2 (2019-12-12)
##  os       macOS Sierra 10.12.6        
##  system   x86_64, darwin15.6.0        
##  ui       X11                         
##  language (EN)                        
##  collate  en_US.UTF-8                 
##  ctype    en_US.UTF-8                 
##  tz       America/Denver              
##  date     2020-05-14                  
## 
## ─ Packages ───────────────────────────────────────────────────────────────────
##  package     * version date       lib source        
##  assertthat    0.2.1   2019-03-21 [1] CRAN (R 3.6.0)
##  backports     1.1.6   2020-04-05 [1] CRAN (R 3.6.2)
##  broom         0.5.6   2020-04-20 [1] CRAN (R 3.6.2)
##  cellranger    1.1.0   2016-07-27 [1] CRAN (R 3.6.0)
##  cli           2.0.2   2020-02-28 [1] CRAN (R 3.6.0)
##  colorspace    1.4-1   2019-03-18 [1] CRAN (R 3.6.0)
##  crayon        1.3.4   2017-09-16 [1] CRAN (R 3.6.0)
##  DBI           1.1.0   2019-12-15 [1] CRAN (R 3.6.0)
##  dbplyr        1.4.3   2020-04-19 [1] CRAN (R 3.6.2)
##  digest        0.6.25  2020-02-23 [1] CRAN (R 3.6.0)
##  dplyr       * 0.8.5   2020-03-07 [1] CRAN (R 3.6.0)
##  ellipsis      0.3.0   2019-09-20 [1] CRAN (R 3.6.0)
##  evaluate      0.14    2019-05-28 [1] CRAN (R 3.6.0)
##  fansi         0.4.1   2020-01-08 [1] CRAN (R 3.6.0)
##  forcats     * 0.5.0   2020-03-01 [1] CRAN (R 3.6.0)
##  fs            1.4.1   2020-04-04 [1] CRAN (R 3.6.2)
##  generics      0.0.2   2018-11-29 [1] CRAN (R 3.6.0)
##  ggplot2     * 3.3.0   2020-03-05 [1] CRAN (R 3.6.0)
##  glue          1.4.0   2020-04-03 [1] CRAN (R 3.6.2)
##  gtable        0.3.0   2019-03-25 [1] CRAN (R 3.6.0)
##  haven         2.2.0   2019-11-08 [1] CRAN (R 3.6.0)
##  highr         0.8     2019-03-20 [1] CRAN (R 3.6.0)
##  hms           0.5.3   2020-01-08 [1] CRAN (R 3.6.0)
##  htmltools     0.4.0   2019-10-04 [1] CRAN (R 3.6.0)
##  httr          1.4.1   2019-08-05 [1] CRAN (R 3.6.0)
##  jsonlite      1.6.1   2020-02-02 [1] CRAN (R 3.6.0)
##  kableExtra  * 1.1.0   2019-03-16 [1] CRAN (R 3.6.0)
##  knitr       * 1.28    2020-02-06 [1] CRAN (R 3.6.0)
##  lattice       0.20-38 2018-11-04 [2] CRAN (R 3.6.2)
##  lifecycle     0.2.0   2020-03-06 [1] CRAN (R 3.6.0)
##  lubridate     1.7.8   2020-04-06 [1] CRAN (R 3.6.2)
##  magrittr      1.5     2014-11-22 [1] CRAN (R 3.6.0)
##  modelr        0.1.6   2020-02-22 [1] CRAN (R 3.6.0)
##  munsell       0.5.0   2018-06-12 [1] CRAN (R 3.6.0)
##  nlme          3.1-142 2019-11-07 [2] CRAN (R 3.6.2)
##  pillar        1.4.3   2019-12-20 [1] CRAN (R 3.6.0)
##  pkgconfig     2.0.3   2019-09-22 [1] CRAN (R 3.6.0)
##  purrr       * 0.3.4   2020-04-17 [1] CRAN (R 3.6.2)
##  R6            2.4.1   2019-11-12 [1] CRAN (R 3.6.0)
##  Rcpp          1.0.4   2020-03-17 [1] CRAN (R 3.6.0)
##  readr       * 1.3.1   2018-12-21 [1] CRAN (R 3.6.0)
##  readxl        1.3.1   2019-03-13 [1] CRAN (R 3.6.0)
##  reprex        0.3.0   2019-05-16 [1] CRAN (R 3.6.0)
##  rlang         0.4.5   2020-03-01 [1] CRAN (R 3.6.0)
##  rmarkdown     2.1     2020-01-20 [1] CRAN (R 3.6.0)
##  rstudioapi    0.11    2020-02-07 [1] CRAN (R 3.6.0)
##  rvest         0.3.5   2019-11-08 [1] CRAN (R 3.6.0)
##  scales        1.1.0   2019-11-18 [1] CRAN (R 3.6.0)
##  sessioninfo   1.1.1   2018-11-05 [1] CRAN (R 3.6.0)
##  stringi       1.4.6   2020-02-17 [1] CRAN (R 3.6.0)
##  stringr     * 1.4.0   2019-02-10 [1] CRAN (R 3.6.0)
##  tibble      * 3.0.1   2020-04-20 [1] CRAN (R 3.6.2)
##  tidyr       * 1.0.2   2020-01-24 [1] CRAN (R 3.6.0)
##  tidyselect    1.0.0   2020-01-27 [1] CRAN (R 3.6.0)
##  tidyverse   * 1.3.0   2019-11-21 [1] CRAN (R 3.6.0)
##  vctrs         0.2.4   2020-03-10 [1] CRAN (R 3.6.0)
##  viridisLite   0.3.0   2018-02-01 [1] CRAN (R 3.6.0)
##  webshot       0.5.2   2019-11-22 [1] CRAN (R 3.6.0)
##  withr         2.2.0   2020-04-20 [1] CRAN (R 3.6.2)
##  xfun          0.13    2020-04-13 [1] CRAN (R 3.6.2)
##  xml2          1.3.2   2020-04-23 [1] CRAN (R 3.6.2)
##  yaml          2.2.1   2020-02-01 [1] CRAN (R 3.6.0)
## 
## [1] /Users/eriq/Library/R/3.6/library
## [2] /Library/Frameworks/R.framework/Versions/3.6/Resources/library

Running Time

Running the code and rendering this notebook required approximately this much time on a Mac laptop of middling speed:

Sys.time() - start_time

## Time difference of 1.477606 secs

107-Population-genetics-survey-rosa

Neil Thompson

Last Updated: 2020-05-14

Frequency table of non-recombinant haplotypes

Frequency table including the within-RoSA recombinants

Session Info

Running Time