Hello everyone,
Context
I’m attempting to build on Bob Carpenter’s implementation of Dorazio et al’s (2006) multi-species occupancy model. I would like to swap the latent predictors influencing detection and occupancy, for some site- and landscape-level predictors.
Background
I have records for 56 bird species at 101 sites, each site was surveyed 3 times in a single season. I have 16 site and landscape predictors, some linear, some interactions, that will likely influence occupancy. These are the same predictors used in a previous multi-species occupancy model that did not include detection history. I also hope to incorporate predictors for detection, principally Julian day of the survey. The only way I can conceive of doing this is by converting the Species x Site matrix used in the MSOM to a vector of presence-absence, with one row per species per survey.
In the model linked to my previous forum post above, @andrjohns gave some invaluable assistance, converting the multiple predictors into a single matrix and using non-centred parameterisation. I hoped to combine the MSOM model and the No-detection-history model, but I can’t get the code to work. I’m not sure how to combine the matrix of predictors with logit_psi
; I’m getting a dimension mismatch. The MSOM model defines this as a vector, but I suspect it needs to be a matrix? Or I need to combine all the individual predictors into a single value somehow?
I don’t have enough Stan experience to work out where I’m going wrong. If anyone is able to help me resolve this, or notices any other errors, I would really appreciate some help, thanks.
Code
// MSOM
// Adapted from https://mc-stan.org/users/documentation/case-studies/dorazio-royle-occupancy.html
// and https://discourse.mc-stan.org/t/multi-species-occupancy-no-detection-history/27274
// Define some helper functions
// Mostly unchanged from Bob Carpenter's MSOM, except
// 'lp_observed' has 'Occ' swapped for 'x', and 'bernoulli_lpmf'
// in place of binomial to account for only single trial.
functions {
matrix cov_matrix_2d(vector sigma, real rho) {
matrix[2,2] Sigma;
Sigma[1,1] = square(sigma[1]);
Sigma[2,2] = square(sigma[2]);
Sigma[1,2] = sigma[1] * sigma[2] * rho;
Sigma[2,1] = Sigma[1,2];
return Sigma;
}
real lp_observed(int Occ, int K, real logit_psi, real logit_theta) {
return log_inv_logit(logit_psi)
+ bernoulli_lpmf(Occ, logit_theta);
}
// Do I need to alter this as well? K no longer appropriate? Only 1 trial not 3.
real lp_unobserved(int K, real logit_psi, real logit_theta) {
return log_sum_exp(lp_observed(0, K, logit_psi, logit_theta),
log1m_inv_logit(logit_psi));
}
// And change K here as well?
real lp_never_observed(int J, int K, real logit_psi, real logit_theta, real Omega) {
real lp_unavailable = bernoulli_lpmf(0 | Omega);
real lp_available = bernoulli_lpmf(1 | Omega)
+ J * lp_unobserved(K, logit_psi, logit_theta);
return log_sum_exp(lp_unavailable, lp_available);
}
}
data {
int N_sp; // Number of species (56)
int J; // Number of sites (101)
int L; // Number of occupancy predictors (16)
int<lower=1> K; // Number of surveys per site (3)
int N; // Number of rows in data (N_sp x J x K = 16968)
int sp [N]; // Vector length N of species ID codes
int<lower=0, upper=1> Occ [N]; // Vector length N of 0 - 1 occupancy
matrix[L, N] Mat; // Matrix of occupancy predictors
int<lower=N_sp> S; // Superpopulation size
int Jday [N]; // Vector length N of survey date as Julian day
}
parameters {
// These are unchanged from the MSOM
real alpha; // site-level occupancy
real beta; // site-level detection
real<lower=0, upper=1> Omega; // availability of a species
real<lower=-1,upper=1> rho_uv; // correlation of (occupancy, detection)
vector<lower=0>[2] sigma_uv; // sd of (occupancy, detection)
vector[2] uv[S]; // species-level (occupancy, detection)
// This is new, based on @andrjohn's assistance.
// Estimate species coefficients as a single matrix also
// with non-centered parameterisation.
matrix<offset=alpha, multiplier=sigma_uv[1]>[L, N_sp] alpha_slopes;
}
transformed parameters {
// New
// Combine the intercept and separate slopes into
// a single object psi
row_vector[N] psi = columns_dot_product(Mat, alpha_slopes[:,sp]);
// Unchanged
// Defines the logit-scaled versions of psi (occupancy)
// and theta (detection) as vectors.
// ** Should these be matrices? **
vector[S] logit_psi; // log odds of occurrence
vector[S] logit_theta; // log odds of detection
// Swapped single parameter 'alpha' for multiple
// predictors 'psi'
for (i in 1:S)
logit_psi[i] = uv[i,1] + psi;
// Added the Julian day slope term
for (i in 1:S)
logit_theta[i] = uv[i,2] + beta * JDay;
}
model {
// Priors. Unchanged.
alpha ~ cauchy(0, 2.5);
beta ~ cauchy(0, 2.5);
sigma_uv ~ cauchy(0, 2.5);
(rho_uv + 1) / 2 ~ beta(2, 2);
uv ~ multi_normal(rep_vector(0, 2), cov_matrix_2d(sigma_uv, rho_uv));
Omega ~ beta(2,2);
// New
// Normal prior for the combined slopes.
// Need to use `to_vector()` on `alpha_slopes` as the
// normal likelihood doesn't have a signature for matrices.
to_vector(alpha_slopes) ~ normal(alpha, sigma_uv);
// Likelihood. Unchanged
for (i in 1:N_sp) {
1 ~ bernoulli(Omega); // observed, so available
if (Occ[i] > 0)
target += lp_observed(Occ[i], K, logit_psi[i], logit_theta[i]);
else
target += lp_unobserved(K, logit_psi[i], logit_theta[i]);
}
for (i in (N_sp + 1):S)
target += lp_never_observed(J, K, logit_psi[i], logit_theta[i], Omega);
}