library(tidyverse)
library(kableExtra)
library(knitr)
dir.create("outputs/107", showWarnings = FALSE, recursive = TRUE)
NT note: the rmd used to produce rosa_nodups is called “Thompson-etal-RoSA-amplicon-data.rmd” and is in the paper_code folder on NT’s laptop.
rosa_nodups <- read_rds("./data/107-RoSA-popgen-survey-data.rds")
First let’s look at how many samples have recombinant genotypes.
rosa_nodups %>%
filter(!str_detect(rosa, "\\?")) %>%
mutate(is_recomb = !rosa %in% c("HHHHHHHH", "LLLLLLLL", "EEEEEEEE")) %>%
group_by(is_recomb) %>%
summarise(n_type = n()) %>%
kable("html") %>%
kable_styling("striped", full_width = FALSE)
is_recomb | n_type |
---|---|
FALSE | 2556 |
TRUE | 31 |
n_recomb <- rosa_nodups %>%
filter(!str_detect(rosa, "\\?")) %>%
mutate(is_recomb = !rosa %in% c("HHHHHHHH", "LLLLLLLL", "EEEEEEEE")) %>%
group_by(is_recomb) %>%
summarise(n_type = n()) %>%
.[2, "n_type"]
n_nonrecomb <- rosa_nodups %>%
filter(!str_detect(rosa, "\\?")) %>%
mutate(is_recomb = !rosa %in% c("HHHHHHHH", "LLLLLLLL", "EEEEEEEE")) %>%
group_by(is_recomb) %>%
summarise(n_type = n()) %>%
.[1, "n_type"]
There are
frequency of recombinant genotypes.
Create a frequency table of non-recombinant genotypes in the populations we want for the paper.
paper_pops <- c("Siletz River spring", "Siletz River fall", "Iron Gate hatchery fall", "Salmon River spring", "Salmon River fall", "Salmon River unknown", "Trinity River hatchery spring", "Trinity River hatchery fall", "Trinity River unknown", "Eel River fall", "Russian River fall", "Sacramento River winter", "Coleman hatchery late-fall", "Butte Creek spring", "Butte Creek fall", "Butte Creek unknown", "Mill-Deer Creek", "Feather River hatchery spring", "Feather River hatchery fall", "Feather River unknown")
freq_table <- rosa_nodups %>%
filter(rosa %in% c("HHHHHHHH", "LLLLLLLL", "EEEEEEEE")) %>%
mutate(rosa = factor(rosa, levels = c("EEEEEEEE", "HHHHHHHH", "LLLLLLLL"))) %>%
group_by(pop_name) %>%
mutate(n_pop = n()) %>%
group_by(pop_name, rosa) %>%
mutate(
n_rosa = n(),
freq_rosa = round(n_rosa / n_pop, 3)
) %>%
ungroup() %>%
dplyr::select(pop_name, n_pop, rosa, freq_rosa) %>%
distinct(pop_name, rosa, .keep_all = TRUE) %>%
spread(rosa, freq_rosa) %>%
arrange(factor(pop_name, levels = paper_pops)) %>%
dplyr::select(pop_name, contains("EE"), contains("HH"), contains("LL"), n_pop) %>%
rename(
Collection = pop_name,
EE = EEEEEEEE,
EL = HHHHHHHH,
LL = LLLLLLLL,
n = n_pop
) %>%
mutate(
Ecotype = str_extract(Collection, "([spring]{6}|[fall]{4}|[unknown]{7}|[winter]{6}|[late]{4}[-]{1}[fall]{4})"),
Ecotype = gsub("unknown", "mix", Ecotype),
Ecotype = gsub("spring", "spring-run", Ecotype),
Ecotype = gsub("fall", "fall-run", Ecotype),
Ecotype = gsub("winter", "winter-run", Ecotype),
Ecotype = gsub("late-fall", "late fall-run", Ecotype),
Ecotype = ifelse(Ecotype == "late fall-run-run", "late fall-run", Ecotype)
) %>%
dplyr::select(Collection, Ecotype, everything()) %>%
replace_na(list(Ecotype = "mix")) %>%
mutate(Collection = recode(
Collection,
`Coleman hatchery late-fall` = "Battle Creek late-fall",
`Iron Gate hatchery fall` = "Klamath River fall"
))
freq_table %>%
replace_na(list(EE = "", EL = "", LL = ""))
## write out frequency table
freq_table %>%
replace_na(list(EE = "0.000", EL = "0.000", LL = "0.000")) %>%
write_csv("outputs/107/RoSA_frequencies_tableS7.csv")
Create a table of all RoSA genotypes including recombinants.
rosa_hap_order <- rosa_nodups %>%
filter(!str_detect(rosa, "\\?")) %>%
distinct(rosa) %>%
mutate(
n_e = str_count(rosa, "E"),
n_h = str_count(rosa, "H"),
n_l = str_count(rosa, "L")
) %>%
arrange(desc(n_e), desc(n_h), desc(n_l)) %>%
pull(rosa)
count_table <- rosa_nodups %>%
filter(!str_detect(rosa, "\\?")) %>%
mutate(rosa_f = factor(rosa, levels = rosa_hap_order)) %>%
group_by(pop_name) %>%
mutate(n_pop = n()) %>%
group_by(pop_name, rosa) %>%
mutate(n_rosa = n()) %>%
ungroup() %>%
dplyr::select(pop_name, n_pop, rosa_f, n_rosa) %>%
distinct(pop_name, rosa_f, .keep_all = TRUE) %>%
spread(rosa_f, n_rosa) %>%
arrange(factor(pop_name, levels = paper_pops)) %>%
dplyr::select(pop_name, rosa_hap_order, n_pop) %>%
rename(
Collection = pop_name,
n = n_pop
) %>%
mutate(Collection = recode(
Collection,
`Coleman hatchery late-fall` = "Battle Creek late-fall",
`Iron Gate hatchery fall` = "Klamath River fall"
))
# new versions of tidyr don't allow type conversions for NA replacements, so
# deal with it this way
count_table_char <- count_table %>%
mutate_all(.funs = as.character)
count_table_char[is.na(count_table_char)] <- ""
## write out count table here.
write_csv(count_table_char, "outputs/107/RoSA_counts_all_genotypes_dataS3.csv")
sessioninfo::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
## setting value
## version R version 3.6.2 (2019-12-12)
## os macOS Sierra 10.12.6
## system x86_64, darwin15.6.0
## ui X11
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz America/Denver
## date 2020-05-14
##
## ─ Packages ───────────────────────────────────────────────────────────────────
## package * version date lib source
## assertthat 0.2.1 2019-03-21 [1] CRAN (R 3.6.0)
## backports 1.1.6 2020-04-05 [1] CRAN (R 3.6.2)
## broom 0.5.6 2020-04-20 [1] CRAN (R 3.6.2)
## cellranger 1.1.0 2016-07-27 [1] CRAN (R 3.6.0)
## cli 2.0.2 2020-02-28 [1] CRAN (R 3.6.0)
## colorspace 1.4-1 2019-03-18 [1] CRAN (R 3.6.0)
## crayon 1.3.4 2017-09-16 [1] CRAN (R 3.6.0)
## DBI 1.1.0 2019-12-15 [1] CRAN (R 3.6.0)
## dbplyr 1.4.3 2020-04-19 [1] CRAN (R 3.6.2)
## digest 0.6.25 2020-02-23 [1] CRAN (R 3.6.0)
## dplyr * 0.8.5 2020-03-07 [1] CRAN (R 3.6.0)
## ellipsis 0.3.0 2019-09-20 [1] CRAN (R 3.6.0)
## evaluate 0.14 2019-05-28 [1] CRAN (R 3.6.0)
## fansi 0.4.1 2020-01-08 [1] CRAN (R 3.6.0)
## forcats * 0.5.0 2020-03-01 [1] CRAN (R 3.6.0)
## fs 1.4.1 2020-04-04 [1] CRAN (R 3.6.2)
## generics 0.0.2 2018-11-29 [1] CRAN (R 3.6.0)
## ggplot2 * 3.3.0 2020-03-05 [1] CRAN (R 3.6.0)
## glue 1.4.0 2020-04-03 [1] CRAN (R 3.6.2)
## gtable 0.3.0 2019-03-25 [1] CRAN (R 3.6.0)
## haven 2.2.0 2019-11-08 [1] CRAN (R 3.6.0)
## highr 0.8 2019-03-20 [1] CRAN (R 3.6.0)
## hms 0.5.3 2020-01-08 [1] CRAN (R 3.6.0)
## htmltools 0.4.0 2019-10-04 [1] CRAN (R 3.6.0)
## httr 1.4.1 2019-08-05 [1] CRAN (R 3.6.0)
## jsonlite 1.6.1 2020-02-02 [1] CRAN (R 3.6.0)
## kableExtra * 1.1.0 2019-03-16 [1] CRAN (R 3.6.0)
## knitr * 1.28 2020-02-06 [1] CRAN (R 3.6.0)
## lattice 0.20-38 2018-11-04 [2] CRAN (R 3.6.2)
## lifecycle 0.2.0 2020-03-06 [1] CRAN (R 3.6.0)
## lubridate 1.7.8 2020-04-06 [1] CRAN (R 3.6.2)
## magrittr 1.5 2014-11-22 [1] CRAN (R 3.6.0)
## modelr 0.1.6 2020-02-22 [1] CRAN (R 3.6.0)
## munsell 0.5.0 2018-06-12 [1] CRAN (R 3.6.0)
## nlme 3.1-142 2019-11-07 [2] CRAN (R 3.6.2)
## pillar 1.4.3 2019-12-20 [1] CRAN (R 3.6.0)
## pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 3.6.0)
## purrr * 0.3.4 2020-04-17 [1] CRAN (R 3.6.2)
## R6 2.4.1 2019-11-12 [1] CRAN (R 3.6.0)
## Rcpp 1.0.4 2020-03-17 [1] CRAN (R 3.6.0)
## readr * 1.3.1 2018-12-21 [1] CRAN (R 3.6.0)
## readxl 1.3.1 2019-03-13 [1] CRAN (R 3.6.0)
## reprex 0.3.0 2019-05-16 [1] CRAN (R 3.6.0)
## rlang 0.4.5 2020-03-01 [1] CRAN (R 3.6.0)
## rmarkdown 2.1 2020-01-20 [1] CRAN (R 3.6.0)
## rstudioapi 0.11 2020-02-07 [1] CRAN (R 3.6.0)
## rvest 0.3.5 2019-11-08 [1] CRAN (R 3.6.0)
## scales 1.1.0 2019-11-18 [1] CRAN (R 3.6.0)
## sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 3.6.0)
## stringi 1.4.6 2020-02-17 [1] CRAN (R 3.6.0)
## stringr * 1.4.0 2019-02-10 [1] CRAN (R 3.6.0)
## tibble * 3.0.1 2020-04-20 [1] CRAN (R 3.6.2)
## tidyr * 1.0.2 2020-01-24 [1] CRAN (R 3.6.0)
## tidyselect 1.0.0 2020-01-27 [1] CRAN (R 3.6.0)
## tidyverse * 1.3.0 2019-11-21 [1] CRAN (R 3.6.0)
## vctrs 0.2.4 2020-03-10 [1] CRAN (R 3.6.0)
## viridisLite 0.3.0 2018-02-01 [1] CRAN (R 3.6.0)
## webshot 0.5.2 2019-11-22 [1] CRAN (R 3.6.0)
## withr 2.2.0 2020-04-20 [1] CRAN (R 3.6.2)
## xfun 0.13 2020-04-13 [1] CRAN (R 3.6.2)
## xml2 1.3.2 2020-04-23 [1] CRAN (R 3.6.2)
## yaml 2.2.1 2020-02-01 [1] CRAN (R 3.6.0)
##
## [1] /Users/eriq/Library/R/3.6/library
## [2] /Library/Frameworks/R.framework/Versions/3.6/Resources/library
Running the code and rendering this notebook required approximately this much time on a Mac laptop of middling speed:
Sys.time() - start_time
## Time difference of 1.477606 secs