Hi Aki,

I’ve got a dynamic occupancy model with colonisation, emigration, and detection rates varying by region OR by sites nested within regions. There’s two approach for each configuration, one where the site and/or region effects are drawn from MVN and one where they are modeled using a latent variable corresponding to each site and/or region. I haven’t done full SBC on this model yet, but I have simulated data in R and parameters get recovered with HMC. I also manage to run the model with no to minimal divergent transitions when using the latent variable approach.

I just did some tests, and Pathfinder runs fine with MVN and latent variable approach as long as there’s no site effects included. That seems to break Pathfinder resulting in `NaN`

or `Inf`

`lp__`

, which means I can’t use any of the initial values for HMC. Again though, running HMC with `init = 0.1`

works fine, although the MVN has divergent transitions with this particular dataset.

I’m aware it’s a lot of code but I’ve cut out most of the fluff if you wanted to take a look. Note that the `data`

`h`

is an array of 3 indicators corresponding to (1) MVN vs latent variable (SEM), (2) the inclusion of a Hawkes process on call rates, and (3) including site effects without or with spatial GP.

```
functions {
/**
* Return log density of partial sum of regions of occupancy model
* See (transformed) data and parameters for arguments
*
* @return Partial sum log density
*/
real partial_sum_lpmf(
data array[] int seq_region, data int start, data int end, data array[] int h,
data array[] int n_site, data array[] int first_site, data array[] int last_site, data array[,] int n_date, data array[,,] int dates, data array[,,] int tau,
data array[,,] int n_call, data array[,,] vector y, data array[,] int q,
vector psi_r, vector gamma_r, vector epsilon_r, vector mu_r, matrix hp,
row_vector phi_raw, vector eta_t, vector eta_s_raw, vector iota_t, matrix iota_O_L, matrix iota_z,
vector iota_ell, array[] vector iota_ell_prior, data array[,] vector utm) {
// partial target and transition rate/log probability matrices
real ptarget = 0;
matrix[2, 2] trm, ltpm;
// log (one-minus) overall occupancy probabilities
int n_region = end - start + 1;
vector[n_region] log_psi_r = log(psi_r[start:end]);
vector[n_region] log1m_psi_r = log1m(psi_r[start:end]);
// for each region
for (r in start:end) {
// index mapping to partial regions
int rr = r - start + 1;
// containers
vector[n_site[r]] gamma, epsilon, mu, ssd_, log_ssd, log1m_ssd;
// site-level effects (SEM or MVN)
matrix[n_site[r], 3] iota;
array[n_site[r]] int site_idx = linspaced_int_array(n_site[r], first_site[r], last_site[r]);
if (h[3] > 0) {
if (h[1]) {
ptarget += normal_lupdf(eta_s_raw[site_idx] | 0, eta_t[2]);
iota = eta_s_raw[site_idx] * phi_raw;
} else {
ptarget += std_normal_lupdf(to_vector(iota_z[site_idx]));
iota = iota_z[site_idx] * diag_post_multiply(iota_O_L', iota_t);
}
// spatial GP
if (h[3] == 2) {
ptarget += inv_gamma_lupdf(iota_ell[r] | iota_ell_prior[r], iota_ell_prior[r]);
iota = cholesky_decompose(gp_exp_quad_cov(utm[r, 1:n_site[r]], 1, iota_ell[r])) * iota;
}
// colonisation, emigration, and call rates
gamma = exp(log(gamma_r[r]) + iota[:, 1]);
epsilon = exp(log(epsilon_r[r]) + iota[:, 2]);
mu = exp(log(mu_r[r]) + iota[:, 3]);
// region-level only
} else {
gamma = rep_vector(gamma_r[r], n_site[r]);
epsilon = rep_vector(epsilon_r[r], n_site[r]);
mu = rep_vector(mu_r[r], n_site[r]);
}
// log (one-minus) stable state occupancy probabilities
ssd_ = ssd(gamma, epsilon);
log_ssd = log(ssd_);
log1m_ssd = log1m(ssd_);
// for each site
for (s in 1:n_site[r]) {
// marginal log probabilities per state
matrix[2, n_date[r, s]] omega;
// for each date a SongMeter was active
for (t in 1:n_date[r, s]) {
// index mapping back to actual date
int tt = dates[r, s, t];
// log detection probabilities for each occupancy state when no calls were recorded
if (n_call[r, s, tt] == 0) {
omega[1, t] = 0;
omega[2, t] = exponential_lccdf(y[r, s, tt, 1] | mu[s]);
// when calls were recorded
} else {
omega[1, t] = negative_infinity();
// constant call rates
if (h[2] == 0) {
omega[2, t] = exponential_lupdf(y[r, s, tt, 1:n_call[r, s, tt]] | mu[s]);
omega[2, t] += exponential_lccdf(y[r, s, tt, n_call[r, s, tt] + 1] | mu[s]);
// Hawkes process
} else {
vector[n_call[r, s, tt] + 1] B, lambda;
B[1] = 0;
for (c in 1:n_call[r, s, tt]) {
B[c + 1] = (1 + B[c]) * exp(-hp[r, 2] * y[r, s, tt, c]);
}
lambda = mu[s] + hp[r, 1] * B;
omega[2, t] = exponential_lupdf(y[r, s, tt, 1:n_call[r, s, tt]] | lambda[1:n_call[r, s, tt]]);
omega[2, t] += exponential_lccdf(y[r, s, tt, n_call[r, s, tt] + 1] | lambda[n_call[r, s, tt] + 1]);
}
}
} // t
// marginal log probabilities at first date
omega[:, 1] += [ log1m_ssd[s], log_ssd[s] ]';
// TRM (transpose for matrix multiplication)
trm = [ [ -gamma[s], gamma[s] ],
[ epsilon[s], -epsilon[s] ] ]';
// LTPM and marginal log probabilities
ltpm = log(matrix_exp(trm));
for (t in 2:n_date[r, s]) {
if (tau[r, s, t - 1] > 1) {
omega[:, t] += log_product_exp(log(matrix_exp(trm * tau[r, s, t - 1])), omega[:, t - 1]);
}
omega[:, t] += log_product_exp(ltpm, omega[:, t - 1]);
} // t
// increment log density
if (q[r, s]) {
ptarget += log_psi_r[rr] + log_sum_exp(omega[:, n_date[r, s]]);
} else {
ptarget += log_sum_exp(log_psi_r[rr] + log_sum_exp(omega[:, n_date[r, s]]), log1m_psi_r[rr]);
}
} // s
} // r
return ptarget;
}
/**
* Return the natural logarithm of the product of the elementwise exponentiation of the specified matrices
*
* @param a First matrix or row_vector
* @param b Second matrix or row_vector
*
* @return log(exp(a) * exp(b))
*/
matrix log_product_exp(matrix a, matrix b) {
int x = rows(a);
int y = cols(b);
int z = cols(a);
matrix[z, x] a_tr = a';
matrix[x, y] c;
for (j in 1:y) {
for (i in 1:x) {
c[i, j] = log_sum_exp(a_tr[:, i] + b[:, j]);
}
}
return c;
}
vector log_product_exp(matrix a, vector b) {
int x = rows(a);
int z = cols(a);
matrix[z, x] a_tr = a';
vector[x] c;
for (i in 1:x) {
c[i] = log_sum_exp(a_tr[:, i] + b);
}
return c;
}
real log_product_exp(row_vector a, vector b) {
real c = log_sum_exp(a' + b);
return c;
}
row_vector log_product_exp(row_vector a, matrix b) {
int y = cols(b);
vector[size(a)] a_tr = a';
row_vector[y] c;
for (j in 1:y) {
c[j] = log_sum_exp(a_tr + b[:, j]);
}
return c;
}
/**
* Return a vector of inverse gamma parameters where the probability mass is contained within
* pre-specified lower and upper bounds, using Stan's algebraic solver. See
* https://betanalpha.github.io/assets/case_studies/some_containment_prior_models.html
*
* @param params_guess Best guess of the shape and scale for the solver to start
* @param theta Lower and upper bounds of input
* @param tails Lower and upper bounds of probability mass
* @param x_i Integer array (must be included even if empty)
*
* @return Vector of inverse gamma shapes
*/
vector inv_gamma_prior(vector guess, vector theta, array[] real tails, array[] int x_i) {
vector[2] params;
params[1] = inv_gamma_cdf(theta[1] | guess[1], guess[2]) - tails[1];
params[2] = inv_gamma_cdf(theta[2] | guess[1], guess[2]) - tails[2];
return params;
}
/**
* Return number of correlations from number of dimensions of correlation matrix
*
* @param n_dim Number of dimensions of correlation matrix
*
* @return Number of correlations
*/
int n_cor(int n_dim) {
return (n_dim * (n_dim - 1)) %/% 2;
}
/**
* Return vector of correlations from lower triangular Cholesky factor of correlation matrix
*
* @param Omega_L Lower triangular Cholesky factor of correlation matrix
*
* @return Vector of correlations
*/
vector cors(matrix Omega_L) {
int n_dim = rows(Omega_L);
int n_rho = n_cor(n_dim);
vector[n_rho] rho;
matrix[n_dim, n_dim] Omega = multiply_lower_tri_self_transpose(Omega_L);
int r = 1;
for (k in 1:(n_dim - 1)) {
for (kk in (k + 1):n_dim) {
rho[r] = Omega[k, kk];
r = r + 1;
}
}
return rho;
}
/**
* Return stable state probability from colonisation and emigration rates
*
* @param gamma Colonisation rate
* @param epsilon Emigration rate
*
* @return Stable state probability
*/
real ssd(real gamma, real epsilon) {
real gamma_prob = 1 - exp(-gamma);
real epsilon_prob = 1 - exp(-epsilon);
return gamma_prob / (epsilon_prob + gamma_prob);
}
vector ssd(vector gamma, vector epsilon) {
int n = size(gamma);
vector[n] gamma_prob = 1 - exp(-gamma);
vector[n] epsilon_prob = 1 - exp(-epsilon);
return gamma_prob ./ (epsilon_prob + gamma_prob);
}
}
data {
int n_region, n_site_max, n_date_max, n_call_max, grainsize; // indexing and grainsize for reduce_sum()
array[n_region] int<lower=1> n_site; // number of sites per region
array[n_region, n_site_max] int n_date; // number of dates per site
array[n_region, n_site_max, n_date_max] int dates, n_call; // number of calls and date indices per site and date
array[n_region, n_site_max, n_date_max] vector[n_call_max + 1] y; // detection histories (calls) per site and date
array[n_region, n_site_max] vector[2] utm; // UTM coordinates per site
array[n_region] vector[2] dist; // min. and max. site distances per region
array[3] int<lower=0, upper=2> h; // indicators for model complexity
int<lower=0, upper=1> sbc; // indicator for performing simulation based calibration (SBC)
int<lower=1> night_hours, n_call_max_sim; // number of nights hours and maximum number of calls for SBC
}
transformed data {
array[n_region] int seq_region = linspaced_int_array(n_region, 1, n_region); // sequence of regions for reduce_sum()
int n_site_sum = sum(n_site); // total number of sites
array[n_region] int first_site, last_site; // indices of first and last site per region for n_site_sum-length vector
array[n_region, n_site_max] int q = rep_array(0, n_region, n_site_max); // Boolean for any calls per site
array[n_region, n_site_max, n_date_max - 1] int tau; // survey data intervals
for (r in 1:n_region) {
last_site[r] = sum(n_site[1:r]);
first_site[r] = last_site[r] - n_site[r] + 1;
for (s in 1:n_site[r]) {
q[r, s] = sum(n_call[r, s]) > 0;
for (t in 2:n_date[r, s]) {
tau[r, s, t - 1] = dates[r, s, t] - dates[r, s, t - 1];
}
} // s
} // r
}
parameters {
// intercepts
real<lower=0, upper=1> psi_a; // overal occupancy intercept
real<lower=0> gamma_a, epsilon_a, mu_a; // colonisation, emigration, and baseline call rate intercepts
// SEM region-level (occupancy, colonisation, emigration, call rate)
row_vector[h[1] ? 3 : 0] phi_raw; // SEM loadings N(0, 1)
vector<lower=0>[!h[1] ? 0 : (h[3] > 0) ? 2 : 1] eta_t; // SEM quantities scales
vector<multiplier=(h[1] ? eta_t[1] : 1)>[h[1] ? n_region : 0] eta_r_raw; // region-level SEM quantities z-scores
vector<lower=0>[h[1] ? 1 : 4] theta_t; // region-level MVN SDs
cholesky_factor_corr[h[1] ? 0 : 4] theta_O_L; // region-level MVN Cholesky factor of correlation matrix
matrix[n_region, h[1] ? 1 : 4] theta_z; // region-level MVN z-scores
// Hawkes process
row_vector<lower=0>[(h[2] > 0) ? 2 : 0] hp_a; // intercepts for alpha and beta
vector<lower=0>[(h[2] == 2) ? 2 : 0] hp_t; // region-level MVN SDs
cholesky_factor_corr[(h[2] == 2) ? 2 : 0] hp_O_L; // region-level MVN Cholesky factor of correlation matrix
matrix[n_region, (h[2] == 2) ? 2 : 0] hp_z; // region-level MVN z-scores
// site-level (colonisation, emigration, call rate)
vector<multiplier=((h[1] && h[3] > 0) ? eta_t[2] : 1)>[(h[1] && h[3] > 0) ? n_site_sum : 0] eta_s_raw; // site-level SEM quantities
vector<lower=0>[h[1] ? 0 : (h[3] == 0) ? 0 : 3] iota_t; // site-level MVN SDs
cholesky_factor_corr[h[1] ? 0 : (h[3] == 0) ? 0 : 3] iota_O_L; // site-level MVN Cholesky factor of correlation matrix
matrix[h[1] ? 0 : (h[3] == 0) ? 0 : n_site_sum, 3] iota_z; // site-level MVN z-scores
vector<lower=0>[(h[3] == 2) ? n_region : 0] iota_ell; // spatial GP length scales
}
transformed parameters {
// SEM loadings and region-level parameters
vector[n_region] psi_r, gamma_r, epsilon_r, mu_r;
{
matrix[n_region, 4] theta;
if (h[1]) {
theta[:, 1] = theta_z[:, 1] * theta_t[1]; // univariate normal for psi
theta[:, 2:4] = eta_r_raw * phi_raw;
} else {
theta = theta_z * diag_post_multiply(theta_O_L', theta_t);
}
psi_r = inv_logit(logit(psi_a) + theta[:, 1]);
gamma_r = exp(log(gamma_a) + theta[:, 2]);
epsilon_r = exp(log(epsilon_a) + theta[:, 3]);
mu_r = exp(log(mu_a) + theta[:, 4]);
}
// Hawkes process (intercepts or MVN)
matrix[n_region, (h[2] == 2) ? 2 : 0] hp;
{
hp = rep_matrix(hp_a, n_region);
if (h[2] == 2) {
hp = exp(log(hp) + hp_z * diag_post_multiply(hp_O_L', hp_t));
}
}
}
model {
// intercepts
target += beta_lupdf(psi_a | 4, 2);
target += exponential_lupdf(gamma_a | 2);
target += exponential_lupdf(epsilon_a | 2);
target += exponential_lupdf(mu_a | 2);
// region-level priors
target += exponential_lupdf(theta_t | 2);
target += std_normal_lupdf(to_vector(theta_z));
if (h[1]) {
target += std_normal_lupdf(phi_raw);
target += exponential_lupdf(eta_t | 1);
target += normal_lupdf(eta_r_raw | 0, eta_t[1]);
} else {
target += lkj_corr_cholesky_lupdf(theta_O_L | 2);
}
// Hawkes process priors
if (h[2] > 0) {
target += exponential_lupdf(hp_a | 4);
if (h[2] == 2) {
target += exponential_lupdf(hp_t | 1);
target += lkj_corr_cholesky_lupdf(hp_O_L | 2);
target += std_normal_lupdf(to_vector(hp_z));
}
}
// site-level MVN priors
if (!h[1] && h[3] > 0) {
target += exponential_lupdf(iota_t | 2);
target += lkj_corr_cholesky_lupdf(iota_O_L | 2);
}
// likelihood (threading)
if (sbc) {
target += reduce_sum(partial_sum_lupmf, seq_region, grainsize, h,
n_site, first_site, last_site, n_date, dates, tau,
n_call_sim, y_sim, q_sim,
psi_r, gamma_r, epsilon_r, mu_r, hp,
phi_raw, eta_t, eta_s_raw, iota_t, iota_O_L, iota_z,
iota_ell, iota_ell_prior, utm);
} else {
target += reduce_sum(partial_sum_lupmf, seq_region, grainsize, h,
n_site, first_site, last_site, n_date, dates, tau,
n_call, y, q,
psi_r, gamma_r, epsilon_r, mu_r, hp,
phi_raw, eta_t, eta_s_raw, iota_t, iota_O_L, iota_z,
iota_ell, iota_ell_prior, utm);
}
}
```