Hello everyone,

I am currently working on a project involving Bayesian model averaging using the brms package in R, and I’ve encountered a challenge that I hope to get some insights on. I have a list of brmsfit models stored by species in all_species_models, and for each species, I’ve determined the models with substantial support based on WAIC, which are listed in all_species_waic_info.

For now, I am focusing on getting the averaging to work on 1/45 species, “Common.wombat”, and I’ve successfully extracted the names of the models that have substantial support. My next step is to retrieve the actual brmsfit model objects corresponding to these names from all_species_models. Here’s where I need guidance: I understand that the posterior_average function in brms requires individual brmsfit objects as arguments (and not a list of these objects). How can I ensure that I am correctly retrieving and providing these brmsfit models to the function?

When I try to run the posterior_average function, I get an error message saying “cannot allocate vector of size 60.4 MB”, despite having plenty of available RAM. It is causing R or my computer to crash. This is happening on multiple computers,both 64-bit systems. I’ve also checked the size of the individual models, and they are all well below the size of the memory that should be available.

My aim to conduct model average coefficients from supported models, specifying “waic” as the method for weighting the models in the averaging process using the weights argument. I want to ensure that this process is executed correctly and efficiently, avoiding any potential memory issues or crashes.

I would greatly appreciate any advice, best practices, or code snippets that could guide me through this process and help ensure that I am handling the brmsfit objects correctly for posterior averaging. Thank you in advance for your time and assistance!

```
devtools::install_github("paul-buerkner/brms")
install.packages(c("StanHeaders", "rstan"), repos = c("https://mc-stan.org/r-packages/", getOption("repos")))
library(lme4)
library(AICcmodavg)
library(brms)
library(ggplot2)
library(bayestestR)
library(openxlsx)
# Set your working directory
setwd("~/Documents/PhD/Data/Landscape data/Data")
data<-read.csv("data_frame_landscapes_time.csv")
data$period <- as.factor(data$period)
# Create a new workbook
wb <- createWorkbook()
# List of species
species_list <- c(
"Common.wombat"
)
# 1. Models
# Outer list to store models of all species
all_species_models <- list()
# List to store all warnings across all species
warning_list <- list()
# Function to handle warnings for the current species
handle_warning <- function(w, model_name, species_name) {
named_warning <- list(species_name = species_name, model_name = model_name, warning = w)
warning_list <<- c(warning_list, list(named_warning))
invokeRestart("muffleWarning")
}
# List to store coefficient summaries and R2 values for all species
all_species_coefs_and_r2 <- list()
# Loop over each species
for (species_name in species_list) {
# Add a new worksheet for this species
addWorksheet(wb, species_name)
# Set species_data to the original data dataframe as each species has its own column
species_data <- data
# List to store all models for the current species
all_models <- list()
# List to store coefficient summaries and R2 values for the current species
species_coefs_and_r2 <- list()
# Variable names for main effects and interactions
vars_main <- c("unburnt", "high_and_very_high", "shannon_fire_severity")
vars_inter <- paste0(vars_main, ":period")
# Predefined mapping for model name prefixes
model_name_prefixes <- c(
unburnt = "modelu",
high_and_very_high = "modelhvh",
shannon_fire_severity = "models"
)
# Loop to run all models for the current species
for (i in seq_along(vars_main)) {
# Model with main effect
model_formula_main <- as.formula(paste(
species_name, " ~ scale(", vars_main[i], ") +
scale(distance_to_perimetre) +
scale(shannon_fire_size_diversity) +
scale(ndvi) + scale(terrain_ruggedness) + scale(elevation) +
factor(period) +
(1+", vars_main[i], "|location) + (1|landscape) + (1|Obs) + offset(log(total_deployment_days_period))",
sep = ""
))
# Updated line for specified model names:
model_name_main <- model_name_prefixes[vars_main[i]]
withCallingHandlers({
model <- brm(
formula = model_formula_main,
family = poisson,
data = species_data,
prior = c(
prior(student_t(4, 0, 1), class = "b"), # for fixed effects
prior(student_t(4, 0, 1), class = "sd") # for random effect standard deviations
),
save_pars = save_pars(all = TRUE),
control = list(adapt_delta = 0.9999, max_treedepth = 15),
chains = 4, iter = 3000, warmup = 1000
)
}, warning = function(w) { handle_warning(w, model_name_main, species_name) })
all_models[[model_name_main]] <- model
# Coefficient summaries and R2 for the main effect model
coefs = summary(model)$fixed
coefs89 = summary(model, prob = .89)$fixed[,3:4]
coefs95 = summary(model, prob = .95)$fixed[,3:4]
coefs = cbind(coefs, coefs89, coefs95)
r2 = bayes_R2(model); names(r2) = c("R2_est", "R2_error", "R2_Q2.5", "R2_Q97.5")
coefs$species = species_name
coefs$model_name = model_name_main
coefs$covariate = rownames(coefs)
coefs = cbind(coefs, r2)
species_coefs_and_r2[[model_name_main]] <- coefs
# Model with interaction
model_formula_inter <- as.formula(paste(
species_name, " ~ scale(", vars_main[i], ") +
scale(distance_to_perimetre) +
scale(shannon_fire_size_diversity) +
scale(ndvi) + scale(terrain_ruggedness) + scale(elevation) +
factor(period)*scale(", vars_main[i], ") +
(1+", vars_main[i], "|location) + (1|landscape) + (1|Obs) + offset(log(total_deployment_days_period))",
sep = ""
))
# Updated line for specified model names:
model_name_inter <- paste0(model_name_prefixes[vars_main[i]], "i")
withCallingHandlers({
model <- brm(
formula = model_formula_inter,
family = poisson,
data = species_data,
prior = c(
prior(student_t(4, 0, 1), class = "b"), # for fixed effects
prior(student_t(4, 0, 1), class = "sd") # for random effect standard deviations
),
save_pars = save_pars(all = TRUE),
control = list(adapt_delta = 0.9999, max_treedepth = 15),
chains = 4, iter = 3000, warmup = 1000
)
}, warning = function(w) { handle_warning(w, model_name_inter, species_name) })
all_models[[model_name_inter]] <- model
# Coefficient summaries and R2 for the interaction model
coefs = summary(model)$fixed
coefs89 = summary(model, prob = .89)$fixed[,3:4]
coefs95 = summary(model, prob = .95)$fixed[,3:4]
coefs = cbind(coefs, coefs89, coefs95)
r2 = bayes_R2(model); names(r2) = c("R2_est", "R2_error", "R2_Q2.5", "R2_Q97.5")
coefs$species = species_name
coefs$model_name = model_name_inter
coefs$covariate = rownames(coefs)
coefs = cbind(coefs, r2)
species_coefs_and_r2[[model_name_inter]] <- coefs
}
# Model without any fire severity variable
withCallingHandlers({
modeln <- brm(
as.formula(paste(species_name, " ~
scale(distance_to_perimetre) +
scale(shannon_fire_size_diversity) +
scale(ndvi) + scale(terrain_ruggedness) + scale(elevation) +
factor(period) +
(1|location) + (1|landscape) + (1|Obs) + offset(log(total_deployment_days_period))")),
family = poisson,
data = species_data,
prior = c(
prior(student_t(4, 0, 1), class = "b"), # for fixed effects
prior(student_t(4, 0, 1), class = "sd") # for random effect standard deviations
),
save_pars = save_pars(all = TRUE),
control = list(adapt_delta = 0.9999, max_treedepth = 15),
chains = 4, iter = 3000, warmup = 1000
)
}, warning = function(w) { handle_warning(w, "modeln", species_name) })
all_models[["modeln"]] <- modeln
# Coefficient summaries and R2 for the model without any fire severity variable
coefs = summary(modeln)$fixed
coefs89 = summary(modeln, prob = .89)$fixed[,3:4]
coefs95 = summary(modeln, prob = .95)$fixed[,3:4]
coefs = cbind(coefs, coefs89, coefs95)
r2 = bayes_R2(modeln); names(r2) = c("R2_est", "R2_error", "R2_Q2.5", "R2_Q97.5")
coefs$species = species_name
coefs$model_name = "modeln"
coefs$covariate = rownames(coefs)
coefs = cbind(coefs, r2)
species_coefs_and_r2[["modeln"]] <- coefs
# Store all models for the current species in the outer list
all_species_models[[species_name]] <- all_models
# Store coefficient summaries and R2 values for the current species in the outer list
all_species_coefs_and_r2[[species_name]] <- species_coefs_and_r2
}
# 2. WAIC
# Outer list to store WAIC information for all species
all_species_waic_info <- list()
# Loop over each species
for (species_name in names(all_species_models)) {
# Extract models for the current species
species_models <- all_species_models[[species_name]]
# List to store WAIC for each model
waic_values <- list()
# Calculate WAIC for all models of the current species
for (model_name in names(species_models)) {
model <- species_models[[model_name]]
waic_values[[model_name]] <- waic(model)$estimates[3, 1]
}
# Find the model with the lowest WAIC
reference_model <- names(which.min(unlist(waic_values)))
reference_waic <- waic_values[[reference_model]]
# Calculate the difference from the reference model
waic_diff <- unlist(waic_values) - reference_waic
# Identify models with substantial support (difference <= 2)
substantial_support <- names(waic_diff[waic_diff <= 2])
# Create a list to store WAIC information for the current species
species_waic_info <- list(
reference_model = reference_model,
reference_waic = reference_waic,
waic_diff = waic_diff,
substantial_support = substantial_support
)
# Store WAIC information for the current species in the outer list
all_species_waic_info[[species_name]] <- species_waic_info
}
# Print the WAIC information
print(all_species_waic_info)
# 3. Model Averaging
# Function to perform model averaging
perform_model_averaging <- function(species_name, all_species_models, all_species_waic_info) {
# Extract supported models based on WAIC
supported_models <- all_species_waic_info[[species_name]]$substantial_support
# Extract brmsfit models for averaging
brms_models <- lapply(supported_models, function(model_name) {
return(all_species_models[[species_name]][[model_name]])
})
# Check the class of each model (they should all be 'brmsfit')
sapply(brms_models, class)
# Perform model averaging
averaged_posterior <- do.call(posterior_average, c(brms_models, list(weights = "waic")))
# Summarize averaged posterior draws
summary_averaged_posterior <- posterior_summary(averaged_posterior)
# Return the summary
return(summary_averaged_posterior)
}
# Apply Model Averaging for Each Species
all_species_averaged_posterior <- list()
for (species_name in names(all_species_models)) {
all_species_averaged_posterior[[species_name]] <- perform_model_averaging(species_name, all_species_models, all_species_waic_info)
}
# Print the Averaged Posterior Summaries
print(all_species_averaged_posterior)
```

edited by @jsocolar for R syntax highlighting.