Calculating log_lik for occupancy (mixture) models

Hello - I am having a hard time understanding how to calculate the `log_lik` (for use in `loo`) for an occupancy-style analysis (similar to those in Kery and Schaub’s Bayesian Population Analysis, translated into Stan here). I have looked at the example from capture-recapture models (described here), but I’m afraid I don’t quite follow how the user-defined function works and it seems more complex than necessary (i.e., for simpler models this has been a straightforward addition to the `generated quantities` block). My model (included below) is basically a single-season occupancy model with three levels of geographically nested varying intercepts and associated predictors for psi and a single level model for p. Any advice on how estimate `log_lik` correctly or an intuition for how this works would be much appreciated.
Thanks!

Model:

``````data {
int<lower=1> S; //number of states
int<lower=1> C; //number of counties
int<lower=1> T; //number of tracts
int<lower=1> B; //number of block groups
int K; //number of tract predictors
int J; //number of county predictors

//LookupID
int <lower=1, upper=S> CtyInSt[C]; //map cty to state
int <lower=1, upper=C> TctInCty[T]; // map tract to cty

//outcome
int<lower=0,upper=1> y[T, B];
//predictors
//occupancy predictors
matrix[T, K] occtct;
matrix[C, J] occcty;
//detection predictor
matrix[T, B] bginc;
matrix[T, B] bgpop;
matrix[T, B] bgAg;

int last[T];
}
transformed data {
int<lower=0,upper=B> sum_y[T];
int<lower=0,upper=T> occ_obs;  // Number of observed occupied sites

occ_obs = 0;
for (i in 1:T) {
sum_y[i] = sum(y[i]);
if (sum_y[i])
occ_obs = occ_obs + 1;
}
}
parameters{
real mu_alpha;
vector[S] alpha_st_tilde;
vector[C] alpha_cty_tilde;
real<lower=0> sigma_alpha;
vector<lower=0> [S] sigma_st;
vector<lower=0>[C] sigma_cty;

vector[J] beta_cty;
vector[K] beta_tct;
real alpha_p;
real beta1_p;
real beta2_p;
real beta3_p;

}
transformed parameters{
vector[S] alpha_occ_st = mu_alpha + sigma_alpha * alpha_st_tilde;
vector[C] alpha_occ_cty = alpha_occ_st[CtyInSt] + occcty * beta_cty + sigma_st[CtyInSt] .* alpha_cty_tilde[CtyInSt];

vector[T] logit_psi;  // Logit occupancy prob.
matrix[T, B] logit_p; // Logit detection prob.

for (i in 1:T)
logit_psi[i] = alpha_occ_cty[TctInCty[i]]  + occtct[i,1:K] * beta_tct  ;
logit_p = alpha_p
+ beta1_p * bginc + beta2_p * bgpop
+ beta3_p * bgAg;
}
model {
// Priors
mu_alpha ~ normal(0,5);
alpha_st_tilde ~ normal(0,1);
alpha_cty_tilde ~ normal(0,1);
sigma_alpha ~ normal(0,1);
sigma_st ~ normal(0,1);
sigma_cty ~ normal(0,1);
beta_cty ~ cauchy(0,2.5);
beta_tct ~ cauchy(0,2.5);
alpha_p ~ normal(0,5);
beta1_p ~ cauchy(0,2.5);
beta2_p ~ cauchy(0,2.5);
beta3_p ~ cauchy(0,2.5);

// Likelihood
for (i in 1:T) {
if (sum_y[i]) { // Occupied and observed
target += bernoulli_logit_lpmf(1 |  logit_psi[i])
+ bernoulli_logit_lpmf(y[i, 1:last[i]] | logit_p[i, 1:last[i]]);
} else {        // Never observed
// Occupied and not observed
target += log_sum_exp(bernoulli_logit_lpmf(1 | logit_psi[i])
+ bernoulli_logit_lpmf(0 | logit_p[i, 1:last[i]]),
// Not occupied
bernoulli_logit_lpmf(0 | logit_psi[i]));
}
}
}
generated quantities {
real<lower=0,upper=1> mean_p = inv_logit(alpha_p);
int occ_fs;       // Number of occupied sites
real psi_con[T];  // prob present | data
int z[T];         // occupancy indicator, 0/1

for (i in 1:T) {
if (sum_y[i] == 0) {  // species not detected
real psi = inv_logit(logit_psi[i]);
vector[last[i]] q = inv_logit(-logit_p[i, 1:last[i]])';  // q = 1 - p
real qT = prod(q[]);
psi_con[i] = (psi * qT) / (psi * qT + (1 - psi));
z[i] = bernoulli_rng(psi_con[i]);
} else {             // species detected at least once
psi_con[i] = 1;
z[i] = 1;
}
}
occ_fs = sum(z);
}``````

Looks like you have everything you need here to compute the log likelihood:

If you want a vector `log_lik` with `T` elements, you can store the log likelihood values in the transformed parameters block, and increment the log density in the model block as follows:

``````...
transformed parameters{
...
vector[T] log_lik;

for (i in 1:T) {
if (sum_y[i]) {
// observed, present
log_lik[i] = bernoulli_logit_lpmf(1 |  logit_psi[i])
+ bernoulli_logit_lpmf(y[i, 1:last[i]] | logit_p[i, 1:last[i]]);
} else {
// not observed
log_lik[i] = log_sum_exp( // either:
// present and not detected
bernoulli_logit_lpmf(1 | logit_psi[i])
+ bernoulli_logit_lpmf(0 | logit_p[i, 1:last[i]]),
// or absent
bernoulli_logit_lpmf(0 | logit_psi[i]));
}
}
...
}
model {
...
// increment the log density
target += sum(log_lik);
...
}
...
``````

Thanks @mbjoseph - just to clarify, do I add this to my existing model, or does it replace something in the model?

It would replace the current likelihood section in your model, so that the final model statement is:

``````data {
int<lower=1> S; //number of states
int<lower=1> C; //number of counties
int<lower=1> T; //number of tracts
int<lower=1> B; //number of block groups
int K; //number of tract predictors
int J; //number of county predictors

//LookupID
int <lower=1, upper=S> CtyInSt[C]; //map cty to state
int <lower=1, upper=C> TctInCty[T]; // map tract to cty

//outcome
int<lower=0,upper=1> y[T, B];

//predictors
//occupancy predictors
matrix[T, K] occtct;
matrix[C, J] occcty;
//detection predictor
matrix[T, B] bginc;
matrix[T, B] bgpop;
matrix[T, B] bgAg;

int last[T];
}

transformed data {
int<lower=0,upper=B> sum_y[T];
int<lower=0,upper=T> occ_obs;  // Number of observed occupied sites

occ_obs = 0;
for (i in 1:T) {
sum_y[i] = sum(y[i]);
if (sum_y[i])
occ_obs = occ_obs + 1;
}
}

parameters{
real mu_alpha;
vector[S] alpha_st_tilde;
vector[C] alpha_cty_tilde;
real<lower=0> sigma_alpha;
vector<lower=0> [S] sigma_st;
vector<lower=0>[C] sigma_cty;

vector[J] beta_cty;
vector[K] beta_tct;
real alpha_p;
real beta1_p;
real beta2_p;
real beta3_p;
}

transformed parameters{
vector[S] alpha_occ_st = mu_alpha + sigma_alpha * alpha_st_tilde;
vector[C] alpha_occ_cty = alpha_occ_st[CtyInSt] + occcty * beta_cty + sigma_st[CtyInSt] .* alpha_cty_tilde[CtyInSt];

vector[T] logit_psi;  // Logit occupancy prob.
matrix[T, B] logit_p; // Logit detection prob.
vector[T] log_lik;

for (i in 1:T)
logit_psi[i] = alpha_occ_cty[TctInCty[i]]  + occtct[i,1:K] * beta_tct;

logit_p = alpha_p
+ beta1_p * bginc + beta2_p * bgpop
+ beta3_p * bgAg;

for (i in 1:T) {
if (sum_y[i]) {
// observed, present
log_lik[i] = bernoulli_logit_lpmf(1 |  logit_psi[i])
+ bernoulli_logit_lpmf(y[i, 1:last[i]] | logit_p[i, 1:last[i]]);
} else {
// not observed
log_lik[i] = log_sum_exp( // either:
// present and not detected
bernoulli_logit_lpmf(1 | logit_psi[i])
+ bernoulli_logit_lpmf(0 | logit_p[i, 1:last[i]]),
// or absent
bernoulli_logit_lpmf(0 | logit_psi[i]));
}
}
}

model {
// Priors
mu_alpha ~ normal(0,5);
alpha_st_tilde ~ normal(0,1);
alpha_cty_tilde ~ normal(0,1);
sigma_alpha ~ normal(0,1);
sigma_st ~ normal(0,1);
sigma_cty ~ normal(0,1);
beta_cty ~ cauchy(0,2.5);
beta_tct ~ cauchy(0,2.5);
alpha_p ~ normal(0,5);
beta1_p ~ cauchy(0,2.5);
beta2_p ~ cauchy(0,2.5);
beta3_p ~ cauchy(0,2.5);

// increment the log density with the likelihood
target += sum(log_lik);
}

generated quantities {
real<lower=0,upper=1> mean_p = inv_logit(alpha_p);
int occ_fs;       // Number of occupied sites
real psi_con[T];  // prob present | data
int z[T];         // occupancy indicator, 0/1

for (i in 1:T) {
if (sum_y[i] == 0) {  // species not detected
real psi = inv_logit(logit_psi[i]);
vector[last[i]] q = inv_logit(-logit_p[i, 1:last[i]])';  // q = 1 - p
real qT = prod(q[]);
psi_con[i] = (psi * qT) / (psi * qT + (1 - psi));
z[i] = bernoulli_rng(psi_con[i]);
} else {             // species detected at least once
psi_con[i] = 1;
z[i] = 1;
}
}
occ_fs = sum(z);
}
``````
1 Like

Fantastic! I thought I had everything I needed, I just couldn’t figure out how to put the statements together. Really appreciate your help.