Hi Aki,
I’ve got a dynamic occupancy model with colonisation, emigration, and detection rates varying by region OR by sites nested within regions. There’s two approach for each configuration, one where the site and/or region effects are drawn from MVN and one where they are modeled using a latent variable corresponding to each site and/or region. I haven’t done full SBC on this model yet, but I have simulated data in R and parameters get recovered with HMC. I also manage to run the model with no to minimal divergent transitions when using the latent variable approach.
I just did some tests, and Pathfinder runs fine with MVN and latent variable approach as long as there’s no site effects included. That seems to break Pathfinder resulting in NaN
or Inf
lp__
, which means I can’t use any of the initial values for HMC. Again though, running HMC with init = 0.1
works fine, although the MVN has divergent transitions with this particular dataset.
I’m aware it’s a lot of code but I’ve cut out most of the fluff if you wanted to take a look. Note that the data
h
is an array of 3 indicators corresponding to (1) MVN vs latent variable (SEM), (2) the inclusion of a Hawkes process on call rates, and (3) including site effects without or with spatial GP.
functions {
/**
* Return log density of partial sum of regions of occupancy model
* See (transformed) data and parameters for arguments
*
* @return Partial sum log density
*/
real partial_sum_lpmf(
data array[] int seq_region, data int start, data int end, data array[] int h,
data array[] int n_site, data array[] int first_site, data array[] int last_site, data array[,] int n_date, data array[,,] int dates, data array[,,] int tau,
data array[,,] int n_call, data array[,,] vector y, data array[,] int q,
vector psi_r, vector gamma_r, vector epsilon_r, vector mu_r, matrix hp,
row_vector phi_raw, vector eta_t, vector eta_s_raw, vector iota_t, matrix iota_O_L, matrix iota_z,
vector iota_ell, array[] vector iota_ell_prior, data array[,] vector utm) {
// partial target and transition rate/log probability matrices
real ptarget = 0;
matrix[2, 2] trm, ltpm;
// log (one-minus) overall occupancy probabilities
int n_region = end - start + 1;
vector[n_region] log_psi_r = log(psi_r[start:end]);
vector[n_region] log1m_psi_r = log1m(psi_r[start:end]);
// for each region
for (r in start:end) {
// index mapping to partial regions
int rr = r - start + 1;
// containers
vector[n_site[r]] gamma, epsilon, mu, ssd_, log_ssd, log1m_ssd;
// site-level effects (SEM or MVN)
matrix[n_site[r], 3] iota;
array[n_site[r]] int site_idx = linspaced_int_array(n_site[r], first_site[r], last_site[r]);
if (h[3] > 0) {
if (h[1]) {
ptarget += normal_lupdf(eta_s_raw[site_idx] | 0, eta_t[2]);
iota = eta_s_raw[site_idx] * phi_raw;
} else {
ptarget += std_normal_lupdf(to_vector(iota_z[site_idx]));
iota = iota_z[site_idx] * diag_post_multiply(iota_O_L', iota_t);
}
// spatial GP
if (h[3] == 2) {
ptarget += inv_gamma_lupdf(iota_ell[r] | iota_ell_prior[r], iota_ell_prior[r]);
iota = cholesky_decompose(gp_exp_quad_cov(utm[r, 1:n_site[r]], 1, iota_ell[r])) * iota;
}
// colonisation, emigration, and call rates
gamma = exp(log(gamma_r[r]) + iota[:, 1]);
epsilon = exp(log(epsilon_r[r]) + iota[:, 2]);
mu = exp(log(mu_r[r]) + iota[:, 3]);
// region-level only
} else {
gamma = rep_vector(gamma_r[r], n_site[r]);
epsilon = rep_vector(epsilon_r[r], n_site[r]);
mu = rep_vector(mu_r[r], n_site[r]);
}
// log (one-minus) stable state occupancy probabilities
ssd_ = ssd(gamma, epsilon);
log_ssd = log(ssd_);
log1m_ssd = log1m(ssd_);
// for each site
for (s in 1:n_site[r]) {
// marginal log probabilities per state
matrix[2, n_date[r, s]] omega;
// for each date a SongMeter was active
for (t in 1:n_date[r, s]) {
// index mapping back to actual date
int tt = dates[r, s, t];
// log detection probabilities for each occupancy state when no calls were recorded
if (n_call[r, s, tt] == 0) {
omega[1, t] = 0;
omega[2, t] = exponential_lccdf(y[r, s, tt, 1] | mu[s]);
// when calls were recorded
} else {
omega[1, t] = negative_infinity();
// constant call rates
if (h[2] == 0) {
omega[2, t] = exponential_lupdf(y[r, s, tt, 1:n_call[r, s, tt]] | mu[s]);
omega[2, t] += exponential_lccdf(y[r, s, tt, n_call[r, s, tt] + 1] | mu[s]);
// Hawkes process
} else {
vector[n_call[r, s, tt] + 1] B, lambda;
B[1] = 0;
for (c in 1:n_call[r, s, tt]) {
B[c + 1] = (1 + B[c]) * exp(-hp[r, 2] * y[r, s, tt, c]);
}
lambda = mu[s] + hp[r, 1] * B;
omega[2, t] = exponential_lupdf(y[r, s, tt, 1:n_call[r, s, tt]] | lambda[1:n_call[r, s, tt]]);
omega[2, t] += exponential_lccdf(y[r, s, tt, n_call[r, s, tt] + 1] | lambda[n_call[r, s, tt] + 1]);
}
}
} // t
// marginal log probabilities at first date
omega[:, 1] += [ log1m_ssd[s], log_ssd[s] ]';
// TRM (transpose for matrix multiplication)
trm = [ [ -gamma[s], gamma[s] ],
[ epsilon[s], -epsilon[s] ] ]';
// LTPM and marginal log probabilities
ltpm = log(matrix_exp(trm));
for (t in 2:n_date[r, s]) {
if (tau[r, s, t - 1] > 1) {
omega[:, t] += log_product_exp(log(matrix_exp(trm * tau[r, s, t - 1])), omega[:, t - 1]);
}
omega[:, t] += log_product_exp(ltpm, omega[:, t - 1]);
} // t
// increment log density
if (q[r, s]) {
ptarget += log_psi_r[rr] + log_sum_exp(omega[:, n_date[r, s]]);
} else {
ptarget += log_sum_exp(log_psi_r[rr] + log_sum_exp(omega[:, n_date[r, s]]), log1m_psi_r[rr]);
}
} // s
} // r
return ptarget;
}
/**
* Return the natural logarithm of the product of the elementwise exponentiation of the specified matrices
*
* @param a First matrix or row_vector
* @param b Second matrix or row_vector
*
* @return log(exp(a) * exp(b))
*/
matrix log_product_exp(matrix a, matrix b) {
int x = rows(a);
int y = cols(b);
int z = cols(a);
matrix[z, x] a_tr = a';
matrix[x, y] c;
for (j in 1:y) {
for (i in 1:x) {
c[i, j] = log_sum_exp(a_tr[:, i] + b[:, j]);
}
}
return c;
}
vector log_product_exp(matrix a, vector b) {
int x = rows(a);
int z = cols(a);
matrix[z, x] a_tr = a';
vector[x] c;
for (i in 1:x) {
c[i] = log_sum_exp(a_tr[:, i] + b);
}
return c;
}
real log_product_exp(row_vector a, vector b) {
real c = log_sum_exp(a' + b);
return c;
}
row_vector log_product_exp(row_vector a, matrix b) {
int y = cols(b);
vector[size(a)] a_tr = a';
row_vector[y] c;
for (j in 1:y) {
c[j] = log_sum_exp(a_tr + b[:, j]);
}
return c;
}
/**
* Return a vector of inverse gamma parameters where the probability mass is contained within
* pre-specified lower and upper bounds, using Stan's algebraic solver. See
* https://betanalpha.github.io/assets/case_studies/some_containment_prior_models.html
*
* @param params_guess Best guess of the shape and scale for the solver to start
* @param theta Lower and upper bounds of input
* @param tails Lower and upper bounds of probability mass
* @param x_i Integer array (must be included even if empty)
*
* @return Vector of inverse gamma shapes
*/
vector inv_gamma_prior(vector guess, vector theta, array[] real tails, array[] int x_i) {
vector[2] params;
params[1] = inv_gamma_cdf(theta[1] | guess[1], guess[2]) - tails[1];
params[2] = inv_gamma_cdf(theta[2] | guess[1], guess[2]) - tails[2];
return params;
}
/**
* Return number of correlations from number of dimensions of correlation matrix
*
* @param n_dim Number of dimensions of correlation matrix
*
* @return Number of correlations
*/
int n_cor(int n_dim) {
return (n_dim * (n_dim - 1)) %/% 2;
}
/**
* Return vector of correlations from lower triangular Cholesky factor of correlation matrix
*
* @param Omega_L Lower triangular Cholesky factor of correlation matrix
*
* @return Vector of correlations
*/
vector cors(matrix Omega_L) {
int n_dim = rows(Omega_L);
int n_rho = n_cor(n_dim);
vector[n_rho] rho;
matrix[n_dim, n_dim] Omega = multiply_lower_tri_self_transpose(Omega_L);
int r = 1;
for (k in 1:(n_dim - 1)) {
for (kk in (k + 1):n_dim) {
rho[r] = Omega[k, kk];
r = r + 1;
}
}
return rho;
}
/**
* Return stable state probability from colonisation and emigration rates
*
* @param gamma Colonisation rate
* @param epsilon Emigration rate
*
* @return Stable state probability
*/
real ssd(real gamma, real epsilon) {
real gamma_prob = 1 - exp(-gamma);
real epsilon_prob = 1 - exp(-epsilon);
return gamma_prob / (epsilon_prob + gamma_prob);
}
vector ssd(vector gamma, vector epsilon) {
int n = size(gamma);
vector[n] gamma_prob = 1 - exp(-gamma);
vector[n] epsilon_prob = 1 - exp(-epsilon);
return gamma_prob ./ (epsilon_prob + gamma_prob);
}
}
data {
int n_region, n_site_max, n_date_max, n_call_max, grainsize; // indexing and grainsize for reduce_sum()
array[n_region] int<lower=1> n_site; // number of sites per region
array[n_region, n_site_max] int n_date; // number of dates per site
array[n_region, n_site_max, n_date_max] int dates, n_call; // number of calls and date indices per site and date
array[n_region, n_site_max, n_date_max] vector[n_call_max + 1] y; // detection histories (calls) per site and date
array[n_region, n_site_max] vector[2] utm; // UTM coordinates per site
array[n_region] vector[2] dist; // min. and max. site distances per region
array[3] int<lower=0, upper=2> h; // indicators for model complexity
int<lower=0, upper=1> sbc; // indicator for performing simulation based calibration (SBC)
int<lower=1> night_hours, n_call_max_sim; // number of nights hours and maximum number of calls for SBC
}
transformed data {
array[n_region] int seq_region = linspaced_int_array(n_region, 1, n_region); // sequence of regions for reduce_sum()
int n_site_sum = sum(n_site); // total number of sites
array[n_region] int first_site, last_site; // indices of first and last site per region for n_site_sum-length vector
array[n_region, n_site_max] int q = rep_array(0, n_region, n_site_max); // Boolean for any calls per site
array[n_region, n_site_max, n_date_max - 1] int tau; // survey data intervals
for (r in 1:n_region) {
last_site[r] = sum(n_site[1:r]);
first_site[r] = last_site[r] - n_site[r] + 1;
for (s in 1:n_site[r]) {
q[r, s] = sum(n_call[r, s]) > 0;
for (t in 2:n_date[r, s]) {
tau[r, s, t - 1] = dates[r, s, t] - dates[r, s, t - 1];
}
} // s
} // r
}
parameters {
// intercepts
real<lower=0, upper=1> psi_a; // overal occupancy intercept
real<lower=0> gamma_a, epsilon_a, mu_a; // colonisation, emigration, and baseline call rate intercepts
// SEM region-level (occupancy, colonisation, emigration, call rate)
row_vector[h[1] ? 3 : 0] phi_raw; // SEM loadings N(0, 1)
vector<lower=0>[!h[1] ? 0 : (h[3] > 0) ? 2 : 1] eta_t; // SEM quantities scales
vector<multiplier=(h[1] ? eta_t[1] : 1)>[h[1] ? n_region : 0] eta_r_raw; // region-level SEM quantities z-scores
vector<lower=0>[h[1] ? 1 : 4] theta_t; // region-level MVN SDs
cholesky_factor_corr[h[1] ? 0 : 4] theta_O_L; // region-level MVN Cholesky factor of correlation matrix
matrix[n_region, h[1] ? 1 : 4] theta_z; // region-level MVN z-scores
// Hawkes process
row_vector<lower=0>[(h[2] > 0) ? 2 : 0] hp_a; // intercepts for alpha and beta
vector<lower=0>[(h[2] == 2) ? 2 : 0] hp_t; // region-level MVN SDs
cholesky_factor_corr[(h[2] == 2) ? 2 : 0] hp_O_L; // region-level MVN Cholesky factor of correlation matrix
matrix[n_region, (h[2] == 2) ? 2 : 0] hp_z; // region-level MVN z-scores
// site-level (colonisation, emigration, call rate)
vector<multiplier=((h[1] && h[3] > 0) ? eta_t[2] : 1)>[(h[1] && h[3] > 0) ? n_site_sum : 0] eta_s_raw; // site-level SEM quantities
vector<lower=0>[h[1] ? 0 : (h[3] == 0) ? 0 : 3] iota_t; // site-level MVN SDs
cholesky_factor_corr[h[1] ? 0 : (h[3] == 0) ? 0 : 3] iota_O_L; // site-level MVN Cholesky factor of correlation matrix
matrix[h[1] ? 0 : (h[3] == 0) ? 0 : n_site_sum, 3] iota_z; // site-level MVN z-scores
vector<lower=0>[(h[3] == 2) ? n_region : 0] iota_ell; // spatial GP length scales
}
transformed parameters {
// SEM loadings and region-level parameters
vector[n_region] psi_r, gamma_r, epsilon_r, mu_r;
{
matrix[n_region, 4] theta;
if (h[1]) {
theta[:, 1] = theta_z[:, 1] * theta_t[1]; // univariate normal for psi
theta[:, 2:4] = eta_r_raw * phi_raw;
} else {
theta = theta_z * diag_post_multiply(theta_O_L', theta_t);
}
psi_r = inv_logit(logit(psi_a) + theta[:, 1]);
gamma_r = exp(log(gamma_a) + theta[:, 2]);
epsilon_r = exp(log(epsilon_a) + theta[:, 3]);
mu_r = exp(log(mu_a) + theta[:, 4]);
}
// Hawkes process (intercepts or MVN)
matrix[n_region, (h[2] == 2) ? 2 : 0] hp;
{
hp = rep_matrix(hp_a, n_region);
if (h[2] == 2) {
hp = exp(log(hp) + hp_z * diag_post_multiply(hp_O_L', hp_t));
}
}
}
model {
// intercepts
target += beta_lupdf(psi_a | 4, 2);
target += exponential_lupdf(gamma_a | 2);
target += exponential_lupdf(epsilon_a | 2);
target += exponential_lupdf(mu_a | 2);
// region-level priors
target += exponential_lupdf(theta_t | 2);
target += std_normal_lupdf(to_vector(theta_z));
if (h[1]) {
target += std_normal_lupdf(phi_raw);
target += exponential_lupdf(eta_t | 1);
target += normal_lupdf(eta_r_raw | 0, eta_t[1]);
} else {
target += lkj_corr_cholesky_lupdf(theta_O_L | 2);
}
// Hawkes process priors
if (h[2] > 0) {
target += exponential_lupdf(hp_a | 4);
if (h[2] == 2) {
target += exponential_lupdf(hp_t | 1);
target += lkj_corr_cholesky_lupdf(hp_O_L | 2);
target += std_normal_lupdf(to_vector(hp_z));
}
}
// site-level MVN priors
if (!h[1] && h[3] > 0) {
target += exponential_lupdf(iota_t | 2);
target += lkj_corr_cholesky_lupdf(iota_O_L | 2);
}
// likelihood (threading)
if (sbc) {
target += reduce_sum(partial_sum_lupmf, seq_region, grainsize, h,
n_site, first_site, last_site, n_date, dates, tau,
n_call_sim, y_sim, q_sim,
psi_r, gamma_r, epsilon_r, mu_r, hp,
phi_raw, eta_t, eta_s_raw, iota_t, iota_O_L, iota_z,
iota_ell, iota_ell_prior, utm);
} else {
target += reduce_sum(partial_sum_lupmf, seq_region, grainsize, h,
n_site, first_site, last_site, n_date, dates, tau,
n_call, y, q,
psi_r, gamma_r, epsilon_r, mu_r, hp,
phi_raw, eta_t, eta_s_raw, iota_t, iota_O_L, iota_z,
iota_ell, iota_ell_prior, utm);
}
}