Hello - I am having a hard time understanding how to calculate the log_lik (for use in loo) for an occupancy-style analysis (similar to those in Kery and Schaub’s Bayesian Population Analysis, translated into Stan here). I have looked at the example from capture-recapture models (described here), but I’m afraid I don’t quite follow how the user-defined function works and it seems more complex than necessary (i.e., for simpler models this has been a straightforward addition to the generated quantities block). My model (included below) is basically a single-season occupancy model with three levels of geographically nested varying intercepts and associated predictors for psi and a single level model for p. Any advice on how estimate log_lik correctly or an intuition for how this works would be much appreciated.
Thanks!
Model:
data {
int<lower=1> S; //number of states
int<lower=1> C; //number of counties
int<lower=1> T; //number of tracts
int<lower=1> B; //number of block groups
int K; //number of tract predictors
int J; //number of county predictors
//LookupID
int <lower=1, upper=S> CtyInSt[C]; //map cty to state
int <lower=1, upper=C> TctInCty[T]; // map tract to cty
//outcome
int<lower=0,upper=1> y[T, B];
//predictors
//occupancy predictors
matrix[T, K] occtct;
matrix[C, J] occcty;
//detection predictor
matrix[T, B] bginc;
matrix[T, B] bgpop;
matrix[T, B] bgAg;
int last[T];
}
transformed data {
int<lower=0,upper=B> sum_y[T];
int<lower=0,upper=T> occ_obs; // Number of observed occupied sites
occ_obs = 0;
for (i in 1:T) {
sum_y[i] = sum(y[i]);
if (sum_y[i])
occ_obs = occ_obs + 1;
}
}
parameters{
real mu_alpha;
vector[S] alpha_st_tilde;
vector[C] alpha_cty_tilde;
real<lower=0> sigma_alpha;
vector<lower=0> [S] sigma_st;
vector<lower=0>[C] sigma_cty;
vector[J] beta_cty;
vector[K] beta_tct;
real alpha_p;
real beta1_p;
real beta2_p;
real beta3_p;
}
transformed parameters{
vector[S] alpha_occ_st = mu_alpha + sigma_alpha * alpha_st_tilde;
vector[C] alpha_occ_cty = alpha_occ_st[CtyInSt] + occcty * beta_cty + sigma_st[CtyInSt] .* alpha_cty_tilde[CtyInSt];
vector[T] logit_psi; // Logit occupancy prob.
matrix[T, B] logit_p; // Logit detection prob.
for (i in 1:T)
logit_psi[i] = alpha_occ_cty[TctInCty[i]] + occtct[i,1:K] * beta_tct ;
logit_p = alpha_p
+ beta1_p * bginc + beta2_p * bgpop
+ beta3_p * bgAg;
}
model {
// Priors
mu_alpha ~ normal(0,5);
alpha_st_tilde ~ normal(0,1);
alpha_cty_tilde ~ normal(0,1);
sigma_alpha ~ normal(0,1);
sigma_st ~ normal(0,1);
sigma_cty ~ normal(0,1);
beta_cty ~ cauchy(0,2.5);
beta_tct ~ cauchy(0,2.5);
alpha_p ~ normal(0,5);
beta1_p ~ cauchy(0,2.5);
beta2_p ~ cauchy(0,2.5);
beta3_p ~ cauchy(0,2.5);
// Likelihood
for (i in 1:T) {
if (sum_y[i]) { // Occupied and observed
target += bernoulli_logit_lpmf(1 | logit_psi[i])
+ bernoulli_logit_lpmf(y[i, 1:last[i]] | logit_p[i, 1:last[i]]);
} else { // Never observed
// Occupied and not observed
target += log_sum_exp(bernoulli_logit_lpmf(1 | logit_psi[i])
+ bernoulli_logit_lpmf(0 | logit_p[i, 1:last[i]]),
// Not occupied
bernoulli_logit_lpmf(0 | logit_psi[i]));
}
}
}
generated quantities {
real<lower=0,upper=1> mean_p = inv_logit(alpha_p);
int occ_fs; // Number of occupied sites
real psi_con[T]; // prob present | data
int z[T]; // occupancy indicator, 0/1
for (i in 1:T) {
if (sum_y[i] == 0) { // species not detected
real psi = inv_logit(logit_psi[i]);
vector[last[i]] q = inv_logit(-logit_p[i, 1:last[i]])'; // q = 1 - p
real qT = prod(q[]);
psi_con[i] = (psi * qT) / (psi * qT + (1 - psi));
z[i] = bernoulli_rng(psi_con[i]);
} else { // species detected at least once
psi_con[i] = 1;
z[i] = 1;
}
}
occ_fs = sum(z);
}
Looks like you have everything you need here to compute the log likelihood:
If you want a vector log_lik with T elements, you can store the log likelihood values in the transformed parameters block, and increment the log density in the model block as follows:
...
transformed parameters{
...
vector[T] log_lik;
for (i in 1:T) {
if (sum_y[i]) {
// observed, present
log_lik[i] = bernoulli_logit_lpmf(1 | logit_psi[i])
+ bernoulli_logit_lpmf(y[i, 1:last[i]] | logit_p[i, 1:last[i]]);
} else {
// not observed
log_lik[i] = log_sum_exp( // either:
// present and not detected
bernoulli_logit_lpmf(1 | logit_psi[i])
+ bernoulli_logit_lpmf(0 | logit_p[i, 1:last[i]]),
// or absent
bernoulli_logit_lpmf(0 | logit_psi[i]));
}
}
...
}
model {
...
// increment the log density
target += sum(log_lik);
...
}
...