I am trying to build a hierarchical, two-level occupancy model. The model estimates the correlation between species detections using a \Sigma matrix, similar to the model in SUG 1.13.
The model works works until I scale up to greater than ~5 to 10 covariates. This post ( -nan results with lkj_corr_cholesky - Developers - The Stan Forums (mc-stan.org)) describes why.
Looking through the answers, I see an example onion method file, in -nan results with lkj_corr_cholesky - #3 by bgoodri. However, I cannot figure out how to include the onion method within my model. For example, if I want to use the a matrix similar to cholesky_factor_corr
, where do I create this matrix? The transformed parameter section or the model section? The example uses the generated quantities section, but I am using this matrix later in the model.
Here are my questions:
- Is the onion method the best approach?
- Is there another method that would work better?
- Are there any other ways to optimize my code?
One end application I have for this model is to estimate an intercept for each species and then compare the correlation for site occupancy across species.
Here is my example model that complies and works until I add too many species:
data {
// sampling unit-level occupancy covariates
int<lower = 1> n_unit; // number of units (e.g., site and time combos)
int<lower = 1> m_psi; // number of psi predictors
matrix[n_unit, m_psi] x_psi; // psi preditors
int<lower = 1> j_psi; // number of unit (psi) groups
int<lower = 1,upper = j_psi> jj_psi[n_unit]; // group id for psi_level
int<lower = 1> m_psi_star; // number of psi hyper-predictor
matrix[j_psi, m_psi_star] x_psi_star; // psi hyper-predictor matrix
// sample-level detection covariates
int<lower = 1> total_observations; //number of total, raw observations
int<lower = 1> m_p; // number of p predictors
matrix[total_observations, m_p] v_p; // p preidctors
int<lower = 1> j_p; // number of sample (p) groups
int<lower = 1,upper = j_p> jj_p[total_observations]; // group id for p_level
int<lower = 1> m_p_star; // number of p hypter-predictors
matrix[j_psi, m_p_star] v_p_star; // p hyper-predictor matirx
// survey level information
int<lower = 0, upper = 1> y[total_observations]; //raw count observations
int<lower = 0, upper = total_observations> start_idx[n_unit]; //observation index
int<lower = 0, upper = total_observations> end_idx[n_unit]; //observation index
// summary of whether species is known to be present at each unit
int<lower = 0, upper = 1> any_seen[n_unit]; //detection at sample-level?
// number of surveys at each unit
int<lower = 0> n_samples[n_unit]; // number of samples per unit
// priors
matrix[m_psi_star, m_psi] beta_psi_star_prior;
real<lower=0> beta_star_sd_prior;
matrix[m_p_star, m_p] delta_p_star_prior;
real<lower=0> delta_star_sd_prior;
}
parameters {
matrix[m_psi, j_psi] z_psi;
matrix[m_p, j_p] z_p;
matrix[m_psi_star, m_psi] beta_psi_star;
matrix[m_p_star, m_p] delta_p_star;
real<lower=0> sigma_psi;
real<lower=0> sigma_p;
cholesky_factor_corr[m_psi] L_Omega_psi;
vector<lower=0, upper= pi()/2>[m_psi] tau_psi_unif;
cholesky_factor_corr[m_p] L_Omega_p;
vector<lower=0, upper= pi()/2>[m_p] tau_p_unif;
vector[n_unit] logit_psi;
vector[total_observations] logit_p;
}
transformed parameters {
vector<lower=0>[m_psi] tau_psi; // prior scale
vector<lower=0>[m_psi] tau_p; // prior scale
matrix[j_psi, m_psi] beta_psi;
matrix[j_p, m_p] delta_p;
for (k in 1:m_psi) tau_psi[k] = 2.5 * tan(tau_psi_unif[k]);
for (k in 1:m_p) tau_p[k] = 2.5 * tan(tau_p_unif[k]);
beta_psi = x_psi_star * beta_psi_star + (diag_pre_multiply(tau_psi, L_Omega_psi) * z_psi)';
delta_p = v_p_star * delta_p_star + (diag_pre_multiply(tau_p, L_Omega_p) * z_p)';
}
model {
vector[n_unit] log_psi = log_inv_logit(logit_psi);
vector[n_unit] log1m_psi = log1m_inv_logit(logit_psi);
to_vector(delta_p_star) ~ normal(to_vector(delta_p_star_prior), delta_star_sd_prior);
to_vector(beta_psi_star) ~ normal(to_vector(beta_psi_star_prior), beta_star_sd_prior);
logit_p ~ normal(rows_dot_product(delta_p[jj_p], v_p), sigma_p);
logit_psi ~ normal(rows_dot_product(beta_psi[jj_psi], x_psi), sigma_psi);
to_vector(z_psi) ~ std_normal();
L_Omega_psi ~ lkj_corr_cholesky(2);
to_vector(z_p) ~ std_normal();
L_Omega_p ~ lkj_corr_cholesky(2);
for (idx in 1:n_unit) {
if (n_samples[idx] > 0) {
if (any_seen[idx]) {
// unit is occupied
target += log_psi[idx]
+ bernoulli_logit_lpmf(y[start_idx[idx]:end_idx[idx]] |
logit_p[start_idx[idx]:end_idx[idx]]);
} else {
// unit may or may not be occupied
target += log_sum_exp(
log_psi[idx] + bernoulli_logit_lpmf(y[start_idx[idx]:end_idx[idx]] |
logit_p[start_idx[idx]:end_idx[idx]]),
log1m_psi[idx]
);
}
}
}
}
generated quantities {
matrix[m_psi, m_psi] Omega_psi;
matrix[m_p, m_p] Omega_p;
Omega_psi = multiply_lower_tri_self_transpose(L_Omega_psi);
Omega_p = multiply_lower_tri_self_transpose(L_Omega_p);
}
Here is example code to use the model (assuming you save the model as occupancy_2_corr.stan
):
memory.limit(32497*5)
library(tidyverse)
library(GGally)
library(rstan)
library(rstan)
options(mc.cores = parallel::detectCores())
set.seed(1234)
n_chain = 4
## Define function inputs
n_units = 5
n_unit_revisit = 5
n_spp = 3
n_samples = 6
unit_obs <-
tibble(unit_mean = rnorm(n_units, mean = 0, sd = 1)) %>%
rowid_to_column("unit")
## 1b creates spp correlation error
beta_psi_variance <- abs(stats::rnorm(n_spp, mean = 0.20, sd = 0.1))
beta_psi_covariance <- stats::rnorm((n_spp^2 - n_spp)/2,
mean = 0, sd = 0.25)
sigma_beta <- matrix(NA, nrow = n_spp, ncol = n_spp)
diag(sigma_beta) <- beta_psi_variance
sigma_beta[base::lower.tri(sigma_beta)] <- beta_psi_covariance
sigma_beta[base::upper.tri(sigma_beta)] <- beta_psi_covariance
## ensure matrix is positive def
sigma_beta <-
t(sigma_beta) %*% sigma_beta
sigma_beta_sim <-
data.frame(MASS::mvrnorm(n = n_units,
mu = rep(0, n_spp), Sigma = sigma_beta))
colnames(sigma_beta_sim) <- paste0("Spp_", seq_len(n_spp))
unit_obs_spp <- dplyr::bind_cols(unit_obs, sigma_beta_sim)
unit_obs_long <-
tidyr::pivot_longer(unit_obs_spp,
cols = starts_with("Spp_"),
names_to = "Species",
values_to = "spp_error") %>%
dplyr::mutate(unit_response = spp_error,
psi = plogis(unit_response))
unit_subtable <-
expand_grid(
unit = seq_len(n_units),
revisit_id = seq_len(n_unit_revisit)
)
## data should include non-detects
unit_all <-
unit_subtable %>%
dplyr::full_join(unit_obs_long, by = "unit") %>%
dplyr::mutate(z_sim = stats::rbinom(n(), size = 1, p = psi))
unit_data_summary <-
unit_all %>%
dplyr::group_by(unit, Species) %>%
dplyr::summarize(n = dplyr::n(),
psi_sim = mean(psi),
psi_obs = mean(z_sim), .groups = "keep") %>%
dplyr::arrange(unit, Species) %>%
dplyr::mutate(unit_spp = factor(paste(unit, Species, sep = "-")),
unit_spp_id = as.numeric(unit_spp))
# unit_data_group <-
# unit_all %>%
# dplyr::group_by(unit) %>%
# dplyr::summarize(n = dplyr::n(), .groups = "keep")
#############################
## Simulate sample_level data
sample_obs <- expand_grid(unit = seq_len(n_units))
delta_p_variance <- abs(stats::rnorm(n_spp, mean = 0.25, sd = 0.1))
delta_p_covariance <- stats::rnorm((n_spp^2 - n_spp)/2,
mean = 0, sd = 0.25)
sigma_delta <- matrix(NA, nrow = n_spp, ncol = n_spp)
diag(sigma_delta) <- delta_p_variance
sigma_delta[base::lower.tri(sigma_delta)] <- delta_p_covariance
sigma_delta[base::upper.tri(sigma_delta)] <- delta_p_covariance
## ensure matrix is positive def
sigma_delta <-
t(sigma_delta) %*% sigma_delta
sigma_delta_sim <-
data.frame(MASS::mvrnorm(n = n_units,
mu = rep(0, n_spp), Sigma = sigma_delta))
colnames(sigma_delta_sim) <- paste0("Spp_", seq_len(n_spp))
sample_obs_spp <- dplyr::bind_cols(sample_obs, sigma_delta_sim)
sample_obs_long <-
tidyr::pivot_longer(sample_obs_spp,
cols = starts_with("Spp_"),
names_to = "Species",
values_to = "spp_error") %>%
dplyr::mutate(sample_response = spp_error,
p = plogis(sample_response))
sample_obs_longer <-
expand_grid(unit = seq_len(n_units),
revisit_id = seq_len(n_unit_revisit),
sample_id = seq_len(n_samples)) %>%
full_join(sample_obs_long, by = "unit") %>%
full_join(unit_all %>%select(unit, revisit_id, Species, z_sim),
by = c("unit", "revisit_id", "Species")) %>%
mutate(y = z_sim * stats::rbinom(n(), size = 1, p))
sample_obs_longer_summary <-
sample_obs_longer %>%
dplyr::group_by(unit, Species) %>%
dplyr::summarize(n = n(),
total_y = sum(y),
.groups = "keep") %>%
dplyr::mutate(any_seen = ifelse(total_y > 0, 1, 0))
# sample_unit_summary <-
# sample_obs_longer_summary %>%
# dplyr::group_by(unit) %>%
# dplyr::summarize(n = n(),
# total_y = sum(total_y),
# .groups = "keep")
## merge to get any_seen
unit_all <-
unit_all %>%
full_join(sample_obs_longer_summary %>%
select(unit, Species, any_seen, n),
by = c("unit", "Species")) %>%
dplyr::arrange(unit, Species) %>%
mutate(unit_spp = factor(paste(unit, Species, sep = "-")),
unit_spp_id = as.numeric(unit_spp))
sample_obs_longer <-
sample_obs_longer %>%
arrange(unit, Species, revisit_id) %>%
mutate(unit_spp = factor(paste(unit, Species, sep = "-")),
unit_spp_revisit = factor(paste(unit, Species, revisit_id, sep = "-")),
unit_spp_id = as.numeric(unit_spp),
unit_spp_revisit_id = as.numeric(unit_spp_revisit)) %>%
rowid_to_column("index")
sample_obs_idx <-
sample_obs_longer %>%
group_by(unit_spp_revisit, unit_spp_revisit_id) %>%
summarize(n = n(),
start_idx = min(index),
end_idx = max(index), .groups = "keep")
#---
n_unit = nrow(unit_all)
jj_psi = unit_all %>% pull(unit_spp_id)
x_psi = model.matrix(~ unit_spp - 1, unit_all)
m_psi = ncol(x_psi)
j_psi = length(unique(jj_psi))
x_psi_star = model.matrix(~ 1, unit_data_summary)
m_psi_star = ncol(x_psi_star)
total_observations = nrow(sample_obs_longer)
v_p = model.matrix( ~ unit_spp - 1, sample_obs_longer)
m_p = ncol(v_p)
jj_p = sample_obs_longer %>% pull(unit_spp_id)
j_p = length(unique(jj_p))
v_p_star = model.matrix(~ 1, sample_obs_longer_summary)
m_p_star = ncol(v_p_star)
y = sample_obs_longer %>% pull(y)
start_idx = sample_obs_idx %>% pull(start_idx)
end_idx = sample_obs_idx %>% pull(end_idx)
any_seen = unit_all %>% pull(any_seen)
n_samples = unit_all %>% pull(n)
beta_psi_star_prior = matrix(0, nrow = m_psi_star, ncol = m_psi)
beta_star_sd_prior = 1
delta_p_star_prior = matrix(0, nrow = m_p_star, ncol = m_p)
delta_star_sd_prior = 1## stan inputs
stan_data <-
list(n_unit = n_unit,
jj_psi = jj_psi,
x_psi = x_psi,
m_psi = m_psi,
j_psi = j_psi,
x_psi_star = x_psi_star,
m_psi_star = m_psi_star,
total_observations = total_observations,
v_p = v_p,
m_p = m_p,
jj_p = jj_p,
j_p = j_p,
v_p_star = v_p_star,
m_p_star = m_p_star,
y = y,
start_idx = start_idx,
end_idx = end_idx,
any_seen = any_seen,
n_samples = n_samples,
prior_beta_psi = 0,
prior_beta_psi_sd = 1,
prior_delta_p = 0,
prior_delta_p_sd = 1,
beta_psi_star_prior = beta_psi_star_prior,
beta_star_sd_prior = beta_star_sd_prior,
delta_p_star_prior = delta_p_star_prior,
delta_star_sd_prior = delta_star_sd_prior)
test_mod <- rstan::stan_model("./inst/stan/occupancy_2_corr.stan")
stan_out_2 <- rstan::sampling(test_mod, stan_data,
iter = 100, chain = 1)
Also, unless I set init = "0"
, I get this error message when I scale up the model:
Chain 2: Rejecting initial value:
Chain 2: Log probability evaluates to log(0), i.e. negative infinity.
Chain 2: Stan can’t start sampling from this initial value.