5 Modelling fitness

5.1 Introduction

Now that we have mutation load (total, homozygous and heterozygous) estimates for each individual, based on SnpEff and GERP, we can model their effects on lifetime mating success (LMS).

Here, we build three sets of models:

The effect of total load on LMS
The effects of homo- and heterozygous load on LMS
The direct and indirect effects of total load on mating success through the sexual traits

We use Bayesian GLMMs using the R package ‘brms’ to compute these models

5.2 Methods

5.2.1 Total load

The general structure of the total load models is as follows:

LMS ~ scale(total_load) + core + (1|site)

To test for inbreeding depression, we ran a similar model: LMS ~ scale(froh) + core + (1|site)

This is what the script for each load type looks like:

Code

# load pheno data lifetime
load(file = "output/load/pheno_loads_lifetime.RData")

# load mutation load measures
load("output/load/all_loads_combined_da_nosex_29scaf_plus_per_region.RData") #loads no sex chr only 30 scaf

# subset only the relevant method/loadtype
load <- load_per_region %>% filter(loadtype == method)

# combine
pheno_wide_load <- left_join(pheno_wide, load, by = "id")
pheno_wide_load <- subset(pheno_wide_load, !is.na(total_load)) #some ids without genotypes, excluded for wgr

#### model ####
brm_load_t <- brm(LMS_min ~ scale(total_load) + core + (1|site), data = pheno_wide_load,
                  family = "zero_inflated_poisson",
                  prior = prior(normal(0,1), class = b),
                  cores =8, control = list(adapt_delta = 0.99, max_treedepth = 15),
                  iter = iter, thin = thin, warmup = warm, seed = 1908)

save(brm_load_t, file = out)

We can check out the performance of each model using the following loop:

Code

output_total <- list.files(path = "output/models/total_hom_het/",
                         pattern = "lms*", full.names=T)

diagnose_summary <- list()
for (i in 1:length(output)){
  #load fit
  load(file = output[[i]])
  #get posteriors
  posterior <- as.array(fit)
  log_ps <- log_posterior(fit)
  nuts <- nuts_params(fit) #divergence
  #get only beta and sd
  betas <- variables(fit)[grep("b_", variables(fit))]
  sd <- variables(fit)[grep("sd_", variables(fit))]
  
  #global patterns in divergence
  diverge_beta <- mcmc_parcoord(posterior, np = nuts, pars= betas)
  diverge_sd <- mcmc_parcoord(posterior, np = nuts, pars= sd)
  
  #identify collinearity between parameters
  collin_beta <- mcmc_pairs(posterior, np = nuts, pars= betas)
  collin_sd <- mcmc_pairs(posterior, np = nuts, pars= sd)
  
  #traceplot
  trace_beta <- mcmc_trace(posterior, pars = betas, np = nuts)
  trace_sd <- mcmc_trace(posterior, pars = sd, np = nuts)
  
  #rhat
  rhat <- mcmc_rhat(brms::rhat(fit))
  
  #effective sample size
  neff <- mcmc_neff(neff_ratio(fit))
  
  #autocorrelation
  autocor_beta <- mcmc_acf(posterior, pars = betas)
  autocor_sd <- mcmc_acf(posterior, pars=sd)
  
  #quick glance results
  areas <- mcmc_areas(fit, pars=betas)
  
  #combine in list
  diagnosis <- list(diverge_beta = diverge_beta, 
                    diverge_sd = diverge_sd, 
                    collin_beta = collin_beta, 
                    collin_sd = collin_sd, 
                    trace_beta = trace_beta, 
                    trace_sd = trace_sd, 
                    rhat = rhat, 
                    neff = neff, 
                    autocor_beta = autocor_beta, 
                    autocor_sd = autocor_sd,
                    areas = areas)
  
  
  modelname <- sub(".*/", "", output[i]) 
  modelname <- sub(".RData", "", modelname)
  
  # add to summary
  diagnose_summary[[modelname]] <- diagnosis
}

This is what these plots look like for total GERP load effects:

Diagnostics GERP total load model

5.2.2 Hom and het load

The general structure of the homozygous and heterozygous load models is as follows:

LMS ~ scale(het_load) + scale(hom_load) + core + (1|site)

This is what the script for each load type looks like:

Code

# load pheno data lifetime
load(file = "output/load/pheno_loads_lifetime.RData")

# load mutation load measures
load("output/load/all_loads_combined_da_nosex_29scaf_plus_per_region.RData") #loads no sex chr only 30 scaf

# subset only the relevant method/loadtype
load <- load_per_region %>% filter(loadtype == method)

# combine
pheno_wide_load <- left_join(pheno_wide, load, by = "id")
pheno_wide_load <- subset(pheno_wide_load, !is.na(total_load)) #some ids without genotypes, excluded for wgr

#### model ####
brm_load_het_hom <- brm(LMS_min ~ scale(het_load) + scale(hom_load) + core + (1|site), data = pheno_wide_load,
                  family = "zero_inflated_poisson",
                  prior = prior(normal(0,1), class = b),
                  cores =8, control = list(adapt_delta = 0.99, max_treedepth = 15),
                  iter = iter, thin = thin, warmup = warm, seed = 1908)

save(brm_load_het_hom, file = out)

The same diagnostics were applied to each model.

5.2.3 Direct and indirect effects

Next, we build models that are based on annual values. There are two sets of models: the first quantifies the effect of load on the six sexual traits (attendance, fighting rate, centrality, lyre size, blue chroma and red eye comb size) in six separate models. The second set analyses the effect of the six traits and load on annual mating success (MS).

The direct effect is the effect of load on MS while correcting for all mediators. The indirect effect is calculated using a mediation analysis, where this effect is calculated as the product of the effect of the predictor (the total load) on the mediator (the sexual trait) and the effect of the mediator on the response variable (AMS).

5.2.3.1 Set 1: load on traits

For each trait, we run this model:

scale(trait) ~ scale(total_load) + age_cat + (1|year) + (1|site/id)

Code

### load data ###

load(file = "data/phenotypes/phenotypes_annual.RData")

# load mutation load measures
load("output/load/all_loads_combined_da_nosex_29scaf_plus_per_region.RData") #loads no sex chr only 30 scaf

# subset only the relevant method/loadtype
load <- load_per_region %>% filter(loadtype == method)

# merge files
pheno <- left_join(pheno_long, load, by = "id")
pheno$born <- pheno$year - pheno$age
pheno <- pheno %>% mutate(age_cat = as.factor(case_when(age == 1 ~ "yearling", age > 1  ~ "adult")))

### modelling ####
formula <- formula(paste0("scale(", response, ") ~ scale(total_load) + age_cat + (1|year) + (1|site/id)"))

fit <- brm(formula,
            family = "gaussian",
           data = pheno, 
           cores =8,
           control = list(adapt_delta = 0.99, max_treedepth = 15),
           prior = prior(normal(0,1), class = b),
           iter = iterations, 
           thin = thin, warmup = burn)

save(fit, file = out)

5.2.3.2 Set 2: trait + load on MS

Then, for both approaches we run the following model:

MS ~ scale(total_load) + scale(lyre) + scale(eyec) + scale(blue) + scale(dist) + scale(attend) + scale(fight) + age_cat + (1|year) + (1|site/id)

Code

### load data ###

load(file = "data/phenotypes/phenotypes_annual.RData")

# load mutation load measures
load("output/load/all_loads_combined_da_nosex_29scaf_plus_per_region.RData") #loads no sex chr only 30 scaf

# subset only the relevant method/loadtype
load <- load_per_region %>% filter(loadtype == method)

# merge files
pheno <- left_join(pheno_long, load, by = "id")
pheno$born <- pheno$year - pheno$age

pheno <- pheno %>% mutate(age_cat = as.factor(case_when(age == 1 ~ "yearling", age > 1  ~ "adult")))

### modelling ####
formula <- formula("MS ~ scale(total_load) + scale(lyre) + scale(eyec) + scale(blue) + scale(dist) + scale(attend) + scale(fight) + age_cat + (1|year) + (1|site/id)")

fit <- brm(formula,
           family = "zero_inflated_poisson",
           data = pheno, 
           cores =8,
           control = list(adapt_delta = 0.99, max_treedepth = 15),
           prior = prior(normal(0,1), class = b),
           iter = iterations, 
           thin = thin, warmup = burn)

save(fit, file = out)

5.2.3.3 Direct / indirect effects

The direct/indirect effects are then calculated after loading in all model outputs:

Code

### load packages ####

pacman::p_load(brms, bayesplot, dplyr, data.table)

### load models ###

#### gerp ####
load(file = "output/models/annual/traits/model_attend_gerp45.RData")
fit_gerp_attend <- fit
load(file = "output/models/annual/traits/model_fight_gerp45.RData")
fit_gerp_fight <- fit
load(file = "output/models/annual/traits/model_dist_gerp45.RData")
fit_gerp_dist <- fit
load(file = "output/models/annual/traits/model_eyec_gerp45.RData")
fit_gerp_eyec <- fit
load(file = "output/models/annual/traits/model_blue_gerp45.RData")
fit_gerp_blue <- fit
load(file = "output/models/annual/traits/model_lyre_gerp45.RData")
fit_gerp_lyre <- fit

load(file = "output/models/annual/ams/model_trait_ams_gerp45.RData")
fit_gerp_ams <- fit

rm(fit)

### snpeff 
load(file = "output/models/annual/traits/model_attend_high.RData")
fit_high_attend <- fit
load(file = "output/models/annual/traits/model_fight_high.RData")
fit_high_fight <- fit
load(file = "output/models/annual/traits/model_dist_high.RData")
fit_high_dist <- fit
load(file = "output/models/annual/traits/model_eyec_high.RData")
fit_high_eyec <- fit
load(file = "output/models/annual/traits/model_blue_high.RData")
fit_high_blue <- fit
load(file = "output/models/annual/traits/model_lyre_high.RData")
fit_high_lyre <- fit

load(file = "output/models/annual/ams/model_trait_ams_high.RData")
fit_high_ams <- fit

rm(fit)

### indirect effects loop #####
get_indirect <- function(mediator, method, trait_model, ams_model){
  treatment = "b_scaletotal_load"
  path1 <- as_draws_df(trait_model, variable =treatment)
  path1 <- path1$b_scaletotal_load
  
  path2 <- as_draws_df(ams_model, variable = mediator)
  path2 <- unlist(c(path2[,1]))
  
  indirect <- path1*path2
  
  direct <- as_draws_df(ams_model, variable =treatment)
  direct <- direct$b_scaletotal_load
  
  total <- indirect + direct
  
  effect_attend <- data.frame(treatment = treatment,
                              mediator = mediator,
                              method = method,
                              indirect_median = round(median(indirect), 2),
                              indirect_lower = round(quantile(indirect, probs = c(.025)), 2),
                              indirect_upper = round(quantile(indirect, probs = c(.975)), 2),
                              direct_median = round(median(direct), 2),
                              direct_lower = round(quantile(direct, probs = c(.025)), 2),
                              direct_upper = round(quantile(direct, probs = c(.975)), 2),
                              total_median = round(median(total), 2),
                              total_lower = round(quantile(total, probs = c(.025)), 2),
                              total_upper = round(quantile(total, probs = c(.975)), 2),
                              path1_median = round(median(path1), 2),
                              path1_lower = round(quantile(path1, probs = c(.025)), 2),
                              path1_upper = round(quantile(path1, probs = c(.975)), 2),
                              path2_median = round(median(path2), 2),
                              path2_lower = round(quantile(path2, probs = c(.025)), 2),
                              path2_upper = round(quantile(path2, probs = c(.975)), 2),
                              indirect_lower_80 = round(quantile(indirect, probs = c(.1)), 2),
                              indirect_upper_80 = round(quantile(indirect, probs = c(.9)), 2),
                              direct_lower_80 = round(quantile(direct, probs = c(.1)), 2),
                              direct_upper_80 = round(quantile(direct, probs = c(.9)), 2))
  
  return(effect_attend)
}

effects <- data.frame(rbind(get_indirect(mediator="b_scalelyre", method = "gerp45", 
                                         trait_model=fit_gerp_lyre, ams_model = fit_gerp_ams),
                            get_indirect(mediator="b_scaleeyec",  method = "gerp45", 
                                         trait_model=fit_gerp_eyec, ams_model = fit_gerp_ams),
                            get_indirect(mediator="b_scaleblue",  method = "gerp45", 
                                         trait_model=fit_gerp_blue, ams_model = fit_gerp_ams),
                            get_indirect(mediator="b_scaleattend",  method = "gerp45", 
                                         trait_model=fit_gerp_attend, ams_model = fit_gerp_ams),
                            get_indirect(mediator="b_scalefight",  method = "gerp45", 
                                         trait_model=fit_gerp_fight, ams_model = fit_gerp_ams),
                            get_indirect(mediator="b_scaledist",  method = "gerp45", 
                                         trait_model=fit_gerp_dist, ams_model = fit_gerp_ams),
                            get_indirect(mediator="b_scalelyre", method = "high", 
                                         trait_model=fit_high_lyre, ams_model = fit_high_ams),
                            get_indirect(mediator="b_scaleeyec",  method = "high", 
                                         trait_model=fit_high_eyec, ams_model = fit_high_ams),
                            get_indirect(mediator="b_scaleblue",  method = "high", 
                                         trait_model=fit_high_blue, ams_model = fit_high_ams),
                            get_indirect(mediator="b_scaleattend",  method = "high", 
                                         trait_model=fit_high_attend, ams_model = fit_high_ams),
                            get_indirect(mediator="b_scalefight",  method = "high", 
                                         trait_model=fit_high_fight, ams_model = fit_high_ams),
                            get_indirect(mediator="b_scaledist",  method = "high", 
                                         trait_model=fit_high_dist, ams_model = fit_high_ams)))


write.csv(effects, file = "output/models/annual/direct_indirect_summary.csv", quote=F, row.names = F)

5.3 Results

We find significant effects of total GERP and total SnpEff load, and significant effects of inbreeding.

Code

library(readxl); library(dplyr); library(kableExtra)
totals <- read.csv("../output/models/intervals/total_gerp45_high.csv")
totals %>% kbl()

parameter	outer_width	inner_width	point_est	ll	l	m	h	hh	model
b_scaletotal_load	0.95	0.8	median	-0.27	-0.25	-0.21	-0.17	-0.14	GERP
b_scaletotal_load	0.95	0.8	median	-0.18	-0.16	-0.11	-0.06	-0.04	SnpEff

We also find significant effects of both hom and het GERP and SnpEff load:

Code

homhet <- read.csv("../output/models/intervals/hom_het_gerp45_high.csv")
homhet %>% kbl()

parameter	outer_width	inner_width	point_est	ll	l	m	h	hh	model	loadtype
b_scalehom_load	0.95	0.8	median	-0.76	-0.70	-0.57	-0.45	-0.39	Hom	GERP
b_scalehet_load	0.95	0.8	median	-0.78	-0.72	-0.60	-0.48	-0.41	Het	GERP
b_scalehom_load	0.95	0.8	median	-0.17	-0.15	-0.09	-0.04	-0.01	Hom	SnpEff
b_scalehet_load	0.95	0.8	median	-0.24	-0.21	-0.15	-0.09	-0.06	Het	SnpEff

Here you can find the posterior distributions of model set 1 (load on traits) for GERP : MS set 1 results GERP

And for model set 2 (traits on MS) for GERP and SnpEff: MS set 2 results GERP MS set 2 results SnpEff

We can check out the result of the direct and indirect effects as follows:

Code

effects <- read.csv("../output/models/annual/direct_indirect_summary.csv")
effects %>% kbl() %>%  kable_classic_2() %>% scroll_box(width = "99%", height = "200px")

treatment	mediator	method	indirect_median	indirect_lower	indirect_upper	direct_median	direct_lower	direct_upper	total_median	total_lower	total_upper	path1_median	path1_lower	path1_upper	path2_median	path2_lower	path2_upper	indirect_lower_80	indirect_upper_80	direct_lower_80	direct_upper_80
b_scaletotal_load	b_scalelyre	gerp45	-0.01	-0.04	0.01	-0.13	-0.36	0.11	-0.14	-0.38	0.10	-0.03	-0.10	0.04	0.31	-0.07	0.69	-0.03	0.01	-0.29	0.03
b_scaletotal_load	b_scaleeyec	gerp45	0.00	-0.02	0.02	-0.13	-0.36	0.11	-0.13	-0.36	0.11	0.00	-0.09	0.09	0.13	-0.21	0.48	-0.01	0.01	-0.29	0.03
b_scaletotal_load	b_scaleblue	gerp45	0.00	-0.03	0.01	-0.13	-0.36	0.11	-0.14	-0.38	0.10	-0.03	-0.13	0.08	0.15	-0.05	0.36	-0.02	0.01	-0.29	0.03
b_scaletotal_load	b_scaleattend	gerp45	-0.13	-0.28	-0.01	-0.13	-0.36	0.11	-0.26	-0.54	0.01	-0.10	-0.19	-0.01	1.32	0.71	2.01	-0.22	-0.05	-0.29	0.03
b_scaletotal_load	b_scalefight	gerp45	0.00	-0.02	0.02	-0.13	-0.36	0.11	-0.13	-0.36	0.11	0.01	-0.09	0.12	-0.06	-0.33	0.21	-0.01	0.01	-0.29	0.03
b_scaletotal_load	b_scaledist	gerp45	-0.02	-0.11	0.05	-0.13	-0.36	0.11	-0.15	-0.41	0.11	0.04	-0.09	0.16	-0.59	-0.93	-0.23	-0.07	0.02	-0.29	0.03
b_scaletotal_load	b_scalelyre	high	0.01	-0.01	0.05	-0.11	-0.38	0.16	-0.09	-0.37	0.18	0.04	-0.04	0.10	0.33	-0.08	0.72	0.00	0.03	-0.27	0.06
b_scaletotal_load	b_scaleeyec	high	0.00	-0.03	0.02	-0.11	-0.38	0.16	-0.11	-0.38	0.16	-0.01	-0.10	0.07	0.14	-0.21	0.49	-0.01	0.01	-0.27	0.06
b_scaletotal_load	b_scaleblue	high	-0.01	-0.04	0.01	-0.11	-0.38	0.16	-0.12	-0.39	0.15	-0.07	-0.17	0.04	0.15	-0.06	0.36	-0.03	0.00	-0.27	0.06
b_scaletotal_load	b_scaleattend	high	-0.03	-0.16	0.09	-0.11	-0.38	0.16	-0.14	-0.44	0.15	-0.03	-0.11	0.06	1.32	0.72	1.94	-0.11	0.04	-0.27	0.06
b_scaletotal_load	b_scalefight	high	0.00	-0.01	0.02	-0.11	-0.38	0.16	-0.10	-0.38	0.17	-0.02	-0.12	0.08	-0.06	-0.32	0.20	-0.01	0.01	-0.27	0.06
b_scaletotal_load	b_scaledist	high	0.01	-0.06	0.09	-0.11	-0.38	0.16	-0.10	-0.39	0.17	-0.01	-0.14	0.11	-0.57	-0.94	-0.22	-0.04	0.06	-0.27	0.06