library(tidyverse)
library(nlme)
Load RoSA genotype data and join it to the estuary metadata which includes Gonadosomatic Index data (Hearsey 2011).
rosa_data <- read_rds("./data/101-RoSA-klamath-estuary-samples.rds")
estuary_meta <- read_csv("./data/102-Klamath-entryDATA_withNMFSID.csv")
## Warning: Missing column names filled in: 'X16' [16]
rosa_meta <- left_join(estuary_meta, rosa_data, by = c("NMFS ID" = "NMFS_DNA_ID")) %>%
dplyr::select(-ID) %>%
rename(NMFS_DNA_ID = "NMFS ID") %>%
dplyr::select(NMFS_DNA_ID, rosa, everything())
Remove all samples with ANY missing data. And the recombinant genotypes.
rosa_stats <- rosa_meta %>%
filter(
!str_detect(rosa, "\\?"),
rosa %in% c("EEEEEEEE", "HHHHHHHH", "LLLLLLLL")
)
There are 502 at-entry estuary samples with 100% complete genotypes. There are 515 Klamath at-entry estuary site samples.
Visualize the relationship between RoSA and gonadosomatic index.
rosa_stats %>%
mutate(rosa_f = factor(rosa, levels = c("EEEEEEEE", "HHHHHHHH", "LLLLLLLL"))) %>%
ggplot(., aes(x = julian, y = gsi, fill = rosa_f, colour = rosa_f)) +
geom_point() +
xlab(label = "Estuary sampling date") +
ylab(label = "Gonadosomatic Index") +
scale_fill_discrete(name = "Genotype") +
scale_colour_discrete(name = "Genotype") +
theme_bw() +
scale_x_continuous(breaks = c(121, 152, 182, 213, 244, 274, 305), labels = c(
"May-01", "June-01",
"July-01", "Aug-01", "Sept-01", "Oct-01", "Nov-01"
)) +
theme(
axis.text = element_text(size = 12),
axis.title = element_text(size = 14), axis.text.x = element_text(angle = 45, hjust = 1)
) +
guides(
fill = FALSE,
colour = FALSE
) +
facet_grid(year ~ .)
rosa_stats %>%
mutate(rosa_f = factor(rosa, levels = c("EEEEEEEE", "HHHHHHHH", "LLLLLLLL"))) %>%
ggplot(., aes(x = julian, y = gsi, fill = rosa_f, colour = rosa_f)) +
geom_point() +
xlab(label = "Estuary sampling date") +
ylab(label = "Gonadosomatic Index") +
scale_fill_discrete(name = "Genotype") +
scale_colour_discrete(name = "Genotype") +
theme_bw() +
scale_x_continuous(breaks = c(121, 152, 182, 213, 244, 274, 305), labels = c(
"May-01", "June-01",
"July-01", "Aug-01", "Sept-01", "Oct-01", "Nov-01"
)) +
theme(
axis.text = element_text(size = 12),
axis.title = element_text(size = 14), axis.text.x = element_text(angle = 45, hjust = 1), strip.background = element_blank()
) +
facet_grid(year ~ sex)
stat_data <- rosa_stats %>%
mutate(
sex_f = factor(sex, levels = c("f", "m")),
year_f = factor(year, levels = c("2009", "2010")),
rosa_f = factor(rosa, levels = c("EEEEEEEE", "HHHHHHHH", "LLLLLLLL")),
log_est = log(julian),
log_est2 = log(julian)^2,
gonadsi = round(gsi, 4)
)
plot(gonadsi ~ log_est, stat_data)
plot(gonadsi ~ log_est2, stat_data)
plot(gonadsi ~ sex_f, stat_data)
plot(gonadsi ~ year_f, stat_data)
It looks like all of the explanatory variables may have a violation of equal variance. I’ll use the different variance structures to deal with this and see if it’s preferred over a model without the additional variance structures.
It looks like all of the explanatory variables may have a violation of equal variance. I’ll use the different variance structures to deal with this and see if it’s preferred over a model without the additional variance structures.
M0 <- gls(gonadsi ~ 1 + log_est + log_est2 + year_f + sex, method = "REML", data = stat_data)
logestFixed <- varFixed(~log_est)
logest2Fixed <- varFixed(~log_est2)
logest2Power <- varPower(form = ~log_est2)
logest2ConstPower <- varConstPower(form = ~log_est2)
estFixed <- varFixed(~ log_est + log_est2)
sexFixed <- varIdent(~sex_f)
yearFixed <- varIdent(~year_f)
sexyrFixed <- varIdent(~ sex_f + year_f)
combFixed <- varComb(
varFixed(~ log_est + log_est2),
varIdent(~ sex_f + year_f)
)
M1 <- gls(gonadsi ~ 1 + log_est + log_est2 + year_f + sex, weights = logestFixed, method = "REML", data = stat_data)
M2 <- gls(gonadsi ~ 1 + log_est + log_est2 + year_f + sex, weights = logest2Fixed, method = "REML", data = stat_data)
M3 <- gls(gonadsi ~ 1 + log_est + log_est2 + year_f + sex, weights = estFixed, method = "REML", data = stat_data)
M4 <- gls(gonadsi ~ 1 + log_est + log_est2 + year_f + sex, weights = sexFixed, method = "REML", data = stat_data)
M5 <- gls(gonadsi ~ 1 + log_est + log_est2 + year_f + sex, weights = yearFixed, method = "REML", data = stat_data)
M6 <- gls(gonadsi ~ 1 + log_est + log_est2 + year_f + sex, weights = sexyrFixed, method = "REML", data = stat_data)
M7 <- gls(gonadsi ~ 1 + log_est + log_est2 + year_f + sex, weights = combFixed, method = "REML", data = stat_data)
M8 <- gls(gonadsi ~ 1 + log_est + log_est2 + year_f + sex, weights = logest2Power, method = "REML", data = stat_data)
M9 <- gls(gonadsi ~ 1 + log_est + log_est2 + year_f + sex, weights = logest2ConstPower, method = "REML", data = stat_data)
AIC(M0, M1, M2, M3, M4, M5, M6, M7, M8, M9)
The model that uses the varPower variance structure on the log_est2 explanatory variable is the preferred model indicated by AIC. The varConstPower is nearly the same as the varPower structure, but those two are highly preferred over the other variance structures.
Model checking of the varPower model (M8)
e1 <- resid(M8, type = "normalized")
plot(e1 ~ log_est2, stat_data)
Awesome, this looks way way better than the original gls model.
plot(e1 ~ sex_f, stat_data)
plot(e1 ~ log_est, stat_data)
plot(e1 ~ year_f, stat_data)
hist(e1, main = "Histogram of residuals") # looks ok
Moving forward with the varPower structure on log_est2 for the model selection exercise
mm1 <- lme(gonadsi ~ 1 + log_est + log_est2 + year_f + sex, random = ~ 1 | rosa_f, method = "REML", weights = logest2Power, data = stat_data) # random intercept model
anova(M8, mm1) # likelihood ratio test to determine if the mixed effects model is preferred over the gls with varPower variance structure.
Adding a random intercept for RoSA does not improve the model. When forcing the 3 RoSA genotypes to have the same slope (same effect of estuary date on gonadosomatic index) there is no support for differing intercepts (i.e. all RoSA genotypes start with the same gonadosomatic index at day 0).
Lets see what happen when a random intercept and slope for RoSA are included. A random slope would indicate that the strength of the relationship between gonadosomatic index and estuary entry date is not the same among RoSA genotypes.
mm2 <- lme(gonadsi ~ 1 + log_est + log_est2 + year_f + sex, random = ~ 1 + log_est | rosa_f, method = "REML", weights = logest2Power, data = stat_data)
anova(M8, mm2)
A random slope and intercept model is not preferred over the non-random model. (Note this does not differ in a different slope for log_est or log_est2 is used). I used both log_est and log_est2 (change code to “random = ~1+log_est2|RoSA”) and both models were not preferred over the gls.
Let’s see if the random slope (without random intercept) for RoSA is the most preferred model
mm3 <- lme(gonadsi ~ 1 + log_est + log_est2 + year_f + sex, random = ~ 0 + log_est | rosa_f, method = "REML", weights = logest2Power, data = stat_data)
anova(M8, mm3)
No support for a random slope model. I used both log_est and log_est2 (change code to “random = ~0+log_est2|rosa_f”) and both models were not preferred over the gls. See code chunks below.
mm2a <- lme(gonadsi ~ 1 + log_est + log_est2 + year_f + sex, random = ~ 1 + log_est2 | rosa_f, method = "REML", weights = logest2Power, data = stat_data)
anova(M8, mm2a)
mm3a <- lme(gonadsi ~ 1 + log_est + log_est2 + year_f + sex, random = ~ 0 + log_est2 | rosa_f, method = "REML", weights = logest2Power, data = stat_data)
anova(M8, mm3a)
The random slope model isn’t preferred over the fixed effects model or the random slope and random intercept model.
Making sense of these results:
Julian Day is highly significant (it appears, need to do the model selection procedure–> see below)
Random intercept for RoSA not supported –> RoSA genotypes don’t start off at different gonadSI values when the slope relationship between estuary sampling date and gonadosomatic index is not allowed to vary for the three RoSA genotypes.
Random slope for RoSA not supported –> The effect size of estuary sampling date on gonadosomatic index is not different among RoSA genotypes when the intercept is constrained to be the same for all 3 genotypes.
Random slope and intercept model is not preferred over the GLS model.
mm1$coefficients
## $fixed
## (Intercept) log_est log_est2 year_f2010 sexm
## 1.6044820066 -0.6570168098 0.0679206845 -0.0009777401 -0.0138359664
##
## $random
## $random$rosa_f
## (Intercept)
## EEEEEEEE -8.915519e-20
## HHHHHHHH 2.361010e-19
## LLLLLLLL -1.469235e-19
mm3$coefficients
## $fixed
## (Intercept) log_est log_est2 year_f2010 sexm
## 1.6044820059 -0.6570168096 0.0679206845 -0.0009777401 -0.0138359664
##
## $random
## $random$rosa_f
## log_est
## EEEEEEEE -1.566720e-20
## HHHHHHHH 4.302565e-20
## LLLLLLLL -2.735887e-20
mm2$coefficients
## $fixed
## (Intercept) log_est log_est2 year_f2010 sexm
## 1.6044820057 -0.6570168095 0.0679206845 -0.0009777401 -0.0138359664
##
## $random
## $random$rosa_f
## (Intercept) log_est
## EEEEEEEE -4.636679e-20 -7.510387e-21
## HHHHHHHH 1.217988e-19 2.082072e-20
## LLLLLLLL -7.536735e-20 -1.332469e-20
M0 <- gls(gonadsi ~ 1 + log_est + log_est2 + year_f + sex, weights = logest2Power, data = stat_data)
M1 <- gls(gonadsi ~ 1 + log_est + log_est2 + sex, weights = logest2Power, data = stat_data) # model without year
M2 <- gls(gonadsi ~ 1 + log_est + log_est2 + year_f, weights = logest2Power, data = stat_data) # model without sex
M3 <- gls(gonadsi ~ 1 + log_est + year_f + sex, weights = logest2Power, data = stat_data) # model without log estuary date^2
M4 <- gls(gonadsi ~ 1 + log_est2 + year_f + sex, weights = logest2Power, data = stat_data) # model without log estuary date
fit0ml <- update(M0, . ~ ., method = "ML")
fit1ml <- update(M1, . ~ ., method = "ML")
fit2ml <- update(M2, . ~ ., method = "ML")
fit3ml <- update(M3, . ~ ., method = "ML")
fit4ml <- update(M4, . ~ ., method = "ML")
gonadsi ~ 1 + log_est + log_est2 + year_f + sex, weights = logest2Power, data = stat_data
LRT on year
anova(fit0ml, fit1ml) # year is not significant
LRT on sex
anova(fit0ml, fit2ml) # sex is significant
LRT on log_est^2
anova(fit0ml, fit3ml) # log estuary date squared is significant
LRT on log_est
anova(fit0ml, fit4ml) # log estuary date is significant
library(sjPlot)
tab_model(M0)
## 'r2()' does not support models of class 'gls'.
gonadsi | |||
---|---|---|---|
Predictors | Estimates | CI | p |
(Intercept) | 1.60 | 1.25 – 1.96 | <0.001 |
log_est | -0.66 | -0.79 – -0.52 | <0.001 |
log_est2 | 0.07 | 0.05 – 0.08 | <0.001 |
year_f [2010] | -0.00 | -0.00 – 0.00 | 0.110 |
sex [m] | -0.01 | -0.02 – -0.01 | <0.001 |
Observations | 502 |
(The error message here is irrelevant to printing the table we want.)
M1 <- gls(gonadsi ~ 1 + log_est + log_est2 + sex, weights = logest2Power, data = stat_data) # model without year
M2 <- gls(gonadsi ~ 1 + log_est + log_est2, weights = logest2Power, data = stat_data) # model without sex
M3 <- gls(gonadsi ~ 1 + log_est + sex, weights = logest2Power, data = stat_data) # model without log estuary date^2
M4 <- gls(gonadsi ~ 1 + log_est2 + sex, weights = logest2Power, data = stat_data) # model without log estuary date
fit1ml <- update(M1, . ~ ., method = "ML")
fit2ml <- update(M2, . ~ ., method = "ML")
fit3ml <- update(M3, . ~ ., method = "ML")
fit4ml <- update(M4, . ~ ., method = "ML")
gonadsi ~ 1 + log_est + log_est2 + sex, weights = logest2Power, data = stat_data
LRT on sex
anova(fit1ml, fit2ml) # sex is significant
LRT on log_est2
anova(fit1ml, fit3ml) # log estuary date squared is significant
LRT on log_est
anova(fit1ml, fit4ml) # log estuary date is significant
In conclusion, sex, log_est and log_est2 significantly influence gonadosomatic index.
tab_model(M1)
## 'r2()' does not support models of class 'gls'.
gonadsi | |||
---|---|---|---|
Predictors | Estimates | CI | p |
(Intercept) | 1.65 | 1.31 – 2.00 | <0.001 |
log_est | -0.68 | -0.81 – -0.54 | <0.001 |
log_est2 | 0.07 | 0.06 – 0.08 | <0.001 |
sex [m] | -0.01 | -0.01 – -0.01 | <0.001 |
Observations | 502 |
(The error message here is irrelevant to printing the table we want.)
For the effects of RoSA on gonadosomatic index, there is no evidence that a difference in intercept or slope occurs among the RoSA genotypes.
sessioninfo::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
## setting value
## version R version 3.6.2 (2019-12-12)
## os macOS Sierra 10.12.6
## system x86_64, darwin15.6.0
## ui X11
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz America/Denver
## date 2020-05-14
##
## ─ Packages ───────────────────────────────────────────────────────────────────
## package * version date lib source
## assertthat 0.2.1 2019-03-21 [1] CRAN (R 3.6.0)
## backports 1.1.6 2020-04-05 [1] CRAN (R 3.6.2)
## bayestestR 0.6.0 2020-04-20 [1] CRAN (R 3.6.2)
## boot 1.3-23 2019-07-05 [2] CRAN (R 3.6.2)
## broom 0.5.6 2020-04-20 [1] CRAN (R 3.6.2)
## cellranger 1.1.0 2016-07-27 [1] CRAN (R 3.6.0)
## cli 2.0.2 2020-02-28 [1] CRAN (R 3.6.0)
## coda 0.19-3 2019-07-05 [1] CRAN (R 3.6.0)
## colorspace 1.4-1 2019-03-18 [1] CRAN (R 3.6.0)
## crayon 1.3.4 2017-09-16 [1] CRAN (R 3.6.0)
## DBI 1.1.0 2019-12-15 [1] CRAN (R 3.6.0)
## dbplyr 1.4.3 2020-04-19 [1] CRAN (R 3.6.2)
## digest 0.6.25 2020-02-23 [1] CRAN (R 3.6.0)
## dplyr * 0.8.5 2020-03-07 [1] CRAN (R 3.6.0)
## effectsize 0.3.0 2020-04-11 [1] CRAN (R 3.6.2)
## ellipsis 0.3.0 2019-09-20 [1] CRAN (R 3.6.0)
## emmeans 1.4.6 2020-04-19 [1] CRAN (R 3.6.2)
## estimability 1.3 2018-02-11 [1] CRAN (R 3.6.0)
## evaluate 0.14 2019-05-28 [1] CRAN (R 3.6.0)
## fansi 0.4.1 2020-01-08 [1] CRAN (R 3.6.0)
## farver 2.0.3 2020-01-16 [1] CRAN (R 3.6.0)
## forcats * 0.5.0 2020-03-01 [1] CRAN (R 3.6.0)
## fs 1.4.1 2020-04-04 [1] CRAN (R 3.6.2)
## generics 0.0.2 2018-11-29 [1] CRAN (R 3.6.0)
## ggeffects 0.14.3 2020-04-20 [1] CRAN (R 3.6.2)
## ggplot2 * 3.3.0 2020-03-05 [1] CRAN (R 3.6.0)
## glue 1.4.0 2020-04-03 [1] CRAN (R 3.6.2)
## gtable 0.3.0 2019-03-25 [1] CRAN (R 3.6.0)
## haven 2.2.0 2019-11-08 [1] CRAN (R 3.6.0)
## hms 0.5.3 2020-01-08 [1] CRAN (R 3.6.0)
## htmltools 0.4.0 2019-10-04 [1] CRAN (R 3.6.0)
## httr 1.4.1 2019-08-05 [1] CRAN (R 3.6.0)
## insight 0.8.3 2020-04-20 [1] CRAN (R 3.6.2)
## jsonlite 1.6.1 2020-02-02 [1] CRAN (R 3.6.0)
## knitr 1.28 2020-02-06 [1] CRAN (R 3.6.0)
## labeling 0.3 2014-08-23 [1] CRAN (R 3.6.0)
## lattice 0.20-38 2018-11-04 [2] CRAN (R 3.6.2)
## lifecycle 0.2.0 2020-03-06 [1] CRAN (R 3.6.0)
## lme4 1.1-23 2020-04-07 [1] CRAN (R 3.6.2)
## lubridate 1.7.8 2020-04-06 [1] CRAN (R 3.6.2)
## magrittr 1.5 2014-11-22 [1] CRAN (R 3.6.0)
## MASS 7.3-51.6 2020-04-26 [1] CRAN (R 3.6.2)
## Matrix 1.2-18 2019-11-27 [2] CRAN (R 3.6.2)
## minqa 1.2.4 2014-10-09 [1] CRAN (R 3.6.0)
## modelr 0.1.6 2020-02-22 [1] CRAN (R 3.6.0)
## munsell 0.5.0 2018-06-12 [1] CRAN (R 3.6.0)
## mvtnorm 1.1-0 2020-02-24 [1] CRAN (R 3.6.0)
## nlme * 3.1-142 2019-11-07 [2] CRAN (R 3.6.2)
## nloptr 1.2.2.1 2020-03-11 [1] CRAN (R 3.6.0)
## parameters 0.6.1 2020-04-08 [1] CRAN (R 3.6.2)
## performance 0.4.5 2020-03-28 [1] CRAN (R 3.6.2)
## pillar 1.4.3 2019-12-20 [1] CRAN (R 3.6.0)
## pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 3.6.0)
## purrr * 0.3.4 2020-04-17 [1] CRAN (R 3.6.2)
## R6 2.4.1 2019-11-12 [1] CRAN (R 3.6.0)
## Rcpp 1.0.4 2020-03-17 [1] CRAN (R 3.6.0)
## readr * 1.3.1 2018-12-21 [1] CRAN (R 3.6.0)
## readxl 1.3.1 2019-03-13 [1] CRAN (R 3.6.0)
## reprex 0.3.0 2019-05-16 [1] CRAN (R 3.6.0)
## rlang 0.4.5 2020-03-01 [1] CRAN (R 3.6.0)
## rmarkdown 2.1 2020-01-20 [1] CRAN (R 3.6.0)
## rstudioapi 0.11 2020-02-07 [1] CRAN (R 3.6.0)
## rvest 0.3.5 2019-11-08 [1] CRAN (R 3.6.0)
## scales 1.1.0 2019-11-18 [1] CRAN (R 3.6.0)
## sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 3.6.0)
## sjlabelled 1.1.3 2020-01-28 [1] CRAN (R 3.6.0)
## sjmisc 2.8.4 2020-04-03 [1] CRAN (R 3.6.2)
## sjPlot * 2.8.3 2020-03-09 [1] CRAN (R 3.6.0)
## sjstats 0.17.9 2020-02-06 [1] CRAN (R 3.6.0)
## statmod 1.4.34 2020-02-17 [1] CRAN (R 3.6.0)
## stringi 1.4.6 2020-02-17 [1] CRAN (R 3.6.0)
## stringr * 1.4.0 2019-02-10 [1] CRAN (R 3.6.0)
## tibble * 3.0.1 2020-04-20 [1] CRAN (R 3.6.2)
## tidyr * 1.0.2 2020-01-24 [1] CRAN (R 3.6.0)
## tidyselect 1.0.0 2020-01-27 [1] CRAN (R 3.6.0)
## tidyverse * 1.3.0 2019-11-21 [1] CRAN (R 3.6.0)
## vctrs 0.2.4 2020-03-10 [1] CRAN (R 3.6.0)
## withr 2.2.0 2020-04-20 [1] CRAN (R 3.6.2)
## xfun 0.13 2020-04-13 [1] CRAN (R 3.6.2)
## xml2 1.3.2 2020-04-23 [1] CRAN (R 3.6.2)
## xtable 1.8-4 2019-04-21 [1] CRAN (R 3.6.0)
## yaml 2.2.1 2020-02-01 [1] CRAN (R 3.6.0)
##
## [1] /Users/eriq/Library/R/3.6/library
## [2] /Library/Frameworks/R.framework/Versions/3.6/Resources/library
Running the code and rendering this notebook required approximately this much time on a Mac laptop of middling speed:
Sys.time() - start_time
## Time difference of 5.976112 secs