I have a working occupancy model that uses the Onion method for a correlated error structure.
The model runs well, but I would like it to be faster and cleaner if possible. Does anybody have suggestions for how to speed up this model or place parts of the code into external functions?
data {
// sampling unit-level occupancy covariates
int<lower = 1> n_unit;
int<lower = 1> n_psi;
int<lower = 1> m_psi;
matrix[n_psi, m_psi] x_psi;
int<lower = 1> jj_psi[n_psi]; // group id for psi-level
// sampling unit-level hyper-parameters
int<lower = 1> n_psi_star;
int<lower = 1> m_psi_star;
matrix[n_psi_star, m_psi_star] x_psi_star;
matrix[m_psi_star, m_psi] beta_psi_star_prior; // prior values for beta_star
real<lower=0> beta_psi_star_sd_prior; // prior values for beta_star's SD
real<lower=0> eta_psi; // prior for Chol. matrix
// sample-level detection covariates
int<lower = 1> total_observations;
int<lower = 1> m_p;
matrix[total_observations, m_p] v_p;
int<lower = 1> jj_p[total_observations]; // group id for p-level
// sample-level hyper-parameters
int<lower = 1> n_p_star;
int<lower = 1> m_p_star;
matrix[n_p_star, m_p_star] v_p_star;
// survey level information
int<lower = 0> y[total_observations];
int<lower = 0, upper = total_observations> start_idx[n_psi];
int<lower = 0, upper = total_observations> end_idx[n_psi];
// summary of whether species is known to be present at each unit
int<lower = 0, upper = 1> any_seen[n_psi];
// number of surveys at each unit
int<lower = 0> n_samples[n_psi];
matrix[m_p_star, m_p] delta_p_star_prior; // prior values for delta_star
real<lower=0> delta_p_star_sd_prior; // prior values for delta_star's
real<lower=0> eta_p; // prior for Chol. matrix
}
parameters {
matrix[m_psi, n_unit] z_psi;
matrix[m_p, n_unit] z_p;
matrix[m_psi_star, m_psi] beta_psi_star;
real<lower=0> sigma_psi;
vector<lower=0, upper= pi()/2>[m_psi] tau_psi_unif;
vector[n_psi] logit_psi;
row_vector[choose(m_psi, 2) - 1] l_omega_psi; // do NOT init with 0 for all elements
vector<lower = 0,upper = 1>[m_psi - 1] r_2_psi; // first element is not really a R^2 but is on (0,1)
matrix[m_p_star, m_p] delta_p_star;
real<lower=0> sigma_p;
vector<lower=0, upper= pi()/2>[m_p] tau_p_unif;
vector[total_observations] logit_p;
row_vector[choose(m_p, 2) - 1] l_omega_p; // do NOT init with 0 for all elements
vector<lower = 0,upper = 1>[m_p - 1] r_2_p; // first element is not really a R^2 but is on (0,1)
}
transformed parameters {
real<lower = 0> alpha_p = eta_p + (m_p - 2) / 2.0;
vector<lower = 0>[m_p-1] shape_1_p;
vector<lower = 0>[m_p-1] shape_2_p;
real<lower = 0> alpha_psi = eta_psi + (m_psi - 2) / 2.0;
vector<lower = 0>[m_psi-1] shape_1_psi;
vector<lower = 0>[m_psi-1] shape_2_psi;
vector<lower=0>[m_psi] tau_psi; // prior scale
matrix[n_unit, m_psi] beta_psi;
vector<lower=0>[m_p] tau_p; // prior scale
matrix[n_unit, m_p] delta_p;
matrix[m_p, m_p] L_Omega_p = rep_matrix(0, m_p, m_p); // cholesky_factor corr matrix
matrix[m_psi, m_psi] L_Omega_psi = rep_matrix(0, m_psi, m_psi); // cholesky_factor corr matrix
{
int start = 1;
int end = 2;
L_Omega_p[1,1] = 1.0;
L_Omega_p[2,1] = 2.0 * r_2_p[1] - 1.0;
L_Omega_p[2,2] = sqrt(1.0 - square(L_Omega_p[2,1]));
for(k in 2:(m_p - 1)) {
int kp1 = k + 1;
row_vector[k] l_row = segment(l_omega_p, start, k);
real scale = sqrt(r_2_p[k] / dot_self(l_row));
L_Omega_p[kp1, 1:k] = l_row[1:k] * scale;
L_Omega_p[kp1, kp1] = sqrt(1.0 - r_2_p[k]);
start = end + 1;
end = start + k - 1;
}
}
shape_1_p[1] = alpha_p;
shape_2_p[1] = alpha_p;
for(k in 2:(m_p-1)) {
alpha_p -= 0.5;
shape_1_p[k] = k / 2.0;
shape_2_p[k] = alpha_p;
}
{
int start = 1;
int end = 2;
L_Omega_psi[1,1] = 1.0;
L_Omega_psi[2,1] = 2.0 * r_2_psi[1] - 1.0;
L_Omega_psi[2,2] = sqrt(1.0 - square(L_Omega_psi[2,1]));
for(k in 2:(m_psi - 1)) {
int kp1 = k + 1;
row_vector[k] l_row = segment(l_omega_psi, start, k);
real scale = sqrt(r_2_psi[k] / dot_self(l_row));
L_Omega_psi[kp1, 1:k] = l_row[1:k] * scale;
L_Omega_psi[kp1, kp1] = sqrt(1.0 - r_2_psi[k]);
start = end + 1;
end = start + k - 1;
}
}
shape_1_psi[1] = alpha_psi;
shape_2_psi[1] = alpha_psi;
for(k in 2:(m_psi-1)) {
alpha_psi -= 0.5;
shape_1_psi[k] = k / 2.0;
shape_2_psi[k] = alpha_psi;
}
for (k in 1:m_psi) tau_psi[k] = 2.5 * tan(tau_psi_unif[k]);
for (k in 1:m_p) tau_p[k] = 2.5 * tan(tau_p_unif[k]);
beta_psi = x_psi_star * beta_psi_star + (diag_pre_multiply(tau_psi, L_Omega_psi) * z_psi)';
delta_p = v_p_star * delta_p_star + (diag_pre_multiply(tau_p, L_Omega_p) * z_p)';
}
model {
vector[n_psi] log_psi = log_inv_logit(logit_psi);
vector[n_psi] log1m_psi = log1m_inv_logit(logit_psi);
l_omega_p ~ std_normal();
r_2_p ~ beta(shape_1_p, shape_2_p);
l_omega_psi ~ std_normal();
r_2_psi ~ beta(shape_1_psi, shape_2_psi);
logit_psi ~ normal(rows_dot_product(beta_psi[jj_psi], x_psi), sigma_psi);
to_vector(beta_psi_star) ~ normal(to_vector(beta_psi_star_prior), beta_psi_star_sd_prior);
to_vector(z_psi) ~ std_normal();
logit_p ~ normal(rows_dot_product(delta_p[jj_p], v_p), sigma_p);
to_vector(delta_p_star) ~ normal(to_vector(delta_p_star_prior), delta_p_star_sd_prior);
to_vector(z_p) ~ std_normal();
for (idx in 1:n_psi) {
if (n_samples[idx] > 0) {
if (any_seen[idx]) {
// unit is occupied
target += log_psi[idx]
+ bernoulli_logit_lpmf(y[start_idx[idx]:end_idx[idx]] |
logit_p[start_idx[idx]:end_idx[idx]]);
} else {
// unit may or may not be occupied
target += log_sum_exp(
log_psi[idx] + bernoulli_logit_lpmf(y[start_idx[idx]:end_idx[idx]] |
logit_p[start_idx[idx]:end_idx[idx]]),
log1m_psi[idx]
);
}
}
}
}
generated quantities {
matrix[m_psi, m_psi] Omega_psi;
matrix[m_p, m_p] Omega_p;
Omega_psi = multiply_lower_tri_self_transpose(L_Omega_psi);
Omega_p = multiply_lower_tri_self_transpose(L_Omega_p);
}
Last, the model currently lives on a USGS GitLab page inst/stan/occupancy_2_corr.stan · corr · UMESC / quant-ecology / occStan · GitLab (usgs.gov). Hopefully soon USGS will more readily allow us to use GitHub.com. I am sorry I cannot easily host the code there at the moment.