Hello,
I am trying to fit an occupancy model (based on @mbjoseph’s gist) that includes a Conditional Autoregressive component (based on @mbjoseph’s sparse CAR implementation case study). Sampling results in the “low E-BFMI” warning and n_eff
values less than 10 for tau
(the precision parameter in the CAR component) and lp_
when fitting to simulated data. If I use a strongly informative prior (e.g., a normal distribution with mu
= the true value and sd
= 0.1), I can get the model to sample and it returns the correct regression parameter estimates. If I use a new value for tau
the model still samples, but returns an incorrect value for tau
and the parameter estimates for the regression parameters are badly biased. I would like to fit these models to actual data wherein I will not know the true value of tau
, and so likely(?) need to reparamaterize the CAR component of the model. Any assistance would be most appreciated.
Generate simulated data
# Load packages -----------------------------------------------------------
library(tidyverse);
library(spdep);
library(rgdal)
library(rstan);
library(sf);
library(tigris);
options(mc.cores = 3);
rstan_options(auto_write = TRUE)
options(tigris_use_cache = TRUE)
prj <- "+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs"
##Function to simulate spatial autocorrelation in the data
gen_CAR <- function(lyr, a, tau){
lyr.ord <- lyr[order(lyr$GEOID),] %>% as(., "Spatial"); #make sure that layer is ordered by GEOID
lyr.nb <- poly2nb(lyr.ord, row.names = lyr.ord$GEOID); #estimate neighbors could use st_touches on sf, but slower
lyr.W <- nb2mat(lyr.nb, style="B"); #convert neighborhood list into matrix
lyr.D <- diag(rowSums(lyr.W)); # get the diagonal
prec.mat <- tau*(lyr.D - a*lyr.W); #estimate precision matrix
cov.mat <- solve(prec.mat); #invert precision matrix necesarry because tau is the reciprocal of variance
set.seed(1234)
a.lyr.CAR <- MASS::mvrnorm(1, rep(0.5, nrow(cov.mat)), cov.mat) #generate from normal distr with spatial cov matrix
lyr.CAR <- lyr.ord %>% as(.,"sf") %>% cbind(., a.lyr.CAR) #bind to original spatial data frame
return(list(lyr.W, lyr.D, cov.mat, lyr.CAR))
}
# Get a dummy geography ---------------------------------------------------
st <- "IA"
geog.tct <- tracts(st) %>% as(., "sf") %>% st_transform(., crs= prj)
geog.bg <- block_groups(st) %>% as(., "sf") %>% st_transform(., crs= prj)
tct.car <- gen_CAR(geog.tct, 0.8, 2)
tct.df <- tct.car[[4]]
# Simulate true occupancy states ------------------------------------------
# define a design matrix for site-level occupancy
n_site <- nrow(tct.df)
m_psi <- 3 # m_psi is the number of columns in the site level design matrix
X_psi <- matrix(c(rep(1, n_site), rnorm((m_psi - 1) * n_site)),
ncol = m_psi
stopifnot(ncol(X_psi) >= 1)
# get occupancy probabilities
beta_psi <- rnorm(m_psi)
psi <- plogis(X_psi %*% beta_psi + as.vector(tct.df$a.lyr.CAR))
z <- rbinom(n_site, size = 1, prob = psi)
tct.df <- cbind(tct.df, z)
# Simulate imperfect detection/nondetection data --------------------------
# determine number of surveys per site
survey_summary <- geog.bg %>%
group_by(., STATEFP, COUNTYFP, TRACTCE) %>%
summarise(count = n()) #NOTE: This orders the data; fine if CAR function also orders, but could screw things up
n_survey <- as.vector(survey_summary$count)
total_surveys <- nrow(geog.bg)
# define a survey-level design matrix for detection probabilities
m_p <- 2 # m_p is the number of survey-level covariates
beta_p <- rnorm(m_p)
X_p <- matrix(c(rep(1, total_surveys), rnorm((m_p - 1) * total_surveys)),
ncol = m_p)
stopifnot(ncol(X_p) >= 1)
p <- plogis(X_p %*% beta_p)
# simulate observations
survey_df <- tibble(site = rep(1:n_site, n_survey),
siteID = rep(tct.df$GEOID, n_survey)) %>%
mutate(y = rbinom(n = total_surveys, size = 1, prob = z[site] * p))
# get start and end indices to extract slices of y for each site
start_idx <- rep(0, n_site)
end_idx <- rep(0, n_site)
for (i in 1:n_site) {
if (n_survey[i] > 0) {
site_indices <- which(survey_df$site == i)
start_idx[i] <- site_indices[1]
end_idx[i] <- site_indices[n_survey[i]]
}
}
# create vector of whether any positive observations were seen at each site
any_seen <- rep(0, n_site)
for (i in 1:n_site) {
if (n_survey[i] > 0) {
any_seen[i] <- max(survey_df$y[start_idx[i]:end_idx[i]])
}
}
# Bundle data for Stan ----------------------------------------------------
stan_d <- list(n_site = n_site,
m_psi = m_psi,
X_psi = X_psi,
total_surveys = total_surveys,
m_p = m_p,
X_p = X_p,
site = survey_df$site,
y = survey_df$y,
start_idx = start_idx,
end_idx = end_idx,
any_seen = any_seen,
n_survey = n_survey,
W_tct = tct.car[[1]],
W_n = sum(tct.car[[1]])/2)
Stan Model
functions {
/**
* Return the log probability of a proper conditional autoregressive (CAR) prior
* with a sparse representation for the adjacency matrix
*
* @param phi Vector containing the parameters with a CAR prior
* @param tau Precision parameter for the CAR prior (real)
* @param alpha Dependence (usually spatial) parameter for the CAR prior (real)
* @param W_sparse Sparse representation of adjacency matrix (int array)
* @param n Length of phi (int)
* @param W_n Number of adjacent pairs (int)
* @param D_sparse Number of neighbors for each location (vector)
* @param lambda Eigenvalues of D^{-1/2}*W*D^{-1/2} (vector)
*
* @return Log probability density of CAR prior up to additive constant
*/
real sparse_car_lpdf(vector phi, real tau, real alpha,
int[,] W_sparse, vector D_sparse, vector lambda, int n_site, int W_n) {
row_vector[n_site] phit_D; // phi' * D
row_vector[n_site] phit_W; // phi' * W
vector[n_site] ldet_terms;
phit_D = (phi .* D_sparse)';
phit_W = rep_row_vector(0, n_site);
for (i in 1:W_n) {
phit_W[W_sparse[i, 1]] = phit_W[W_sparse[i, 1]] + phi[W_sparse[i, 2]];
phit_W[W_sparse[i, 2]] = phit_W[W_sparse[i, 2]] + phi[W_sparse[i, 1]];
}
for (i in 1:n_site) ldet_terms[i] = log1m(alpha * lambda[i]);
return 0.5 * (n_site * log(tau)
+ sum(ldet_terms)
- tau * (phit_D * phi - alpha * (phit_W * phi)));
}
}
data {
// site-level occupancy covariates
int<lower = 1> n_site;
int<lower = 1> m_psi;
matrix[n_site, m_psi] X_psi;
// survey-level detection covariates
int<lower = 1> total_surveys;
int<lower = 1> m_p;
matrix[total_surveys, m_p] X_p;
// survey level information
int<lower = 1, upper = n_site> site[total_surveys];
int<lower = 0, upper = 1> y[total_surveys];
int<lower = 0, upper = total_surveys> start_idx[n_site];
int<lower = 0, upper = total_surveys> end_idx[n_site];
// summary of whether species is known to be present at each site
int<lower = 0, upper = 1> any_seen[n_site];
// number of surveys at each site
int<lower = 0> n_survey[n_site];
//adjacency data
matrix<lower = 0, upper = 1>[n_site, n_site] W_tct; //adjacency matrix tract
int W_n; //number of adjacent pairs
//real<lower = 0> tau;
}
transformed data {
int W_sparse[W_n, 2]; // adjacency pairs
vector[n_site] D_sparse; // diagonal of D (number of neigbors for each site)
vector[n_site] lambda; // eigenvalues of invsqrtD * W * invsqrtD
{ // generate sparse representation for W
int counter;
counter = 1;
// loop over upper triangular part of W to identify neighbor pairs
for (i in 1:(n_site - 1)) {
for (j in (i + 1):n_site) {
if (W_tct[i, j] == 1) {
W_sparse[counter, 1] = i;
W_sparse[counter, 2] = j;
counter = counter + 1;
}
}
}
}
for (i in 1:n_site) D_sparse[i] = sum(W_tct[i]);
{
vector[n_site] invsqrtD;
for (i in 1:n_site) {
invsqrtD[i] = 1 / sqrt(D_sparse[i]);
}
lambda = eigenvalues_sym(quad_form(W_tct, diag_matrix(invsqrtD)));
}
}
parameters {
vector[n_site] phi;
//real<lower = 0, upper = 1> alpha;
vector[m_psi] beta_psi;
vector[m_p] beta_p;
real<lower = 0, upper =1> alpha_tilde;
real<lower = 0> sigma;
}
transformed parameters {
vector[total_surveys] logit_p = X_p * beta_p;
vector[n_site] logit_psi = X_psi * beta_psi + phi;
real<lower = 0, upper =1> alpha = (1+alpha_tilde)/2;
real<lower = 0> tau = 1/sigma;
}
model {
vector[n_site] log_psi = log_inv_logit(logit_psi);
vector[n_site] log1m_psi = log1m_inv_logit(logit_psi);
phi ~ sparse_car(tau, alpha, W_sparse, D_sparse, lambda, n_site, W_n);
alpha_tilde ~ beta(2, 2);
sigma ~ normal(0.1,1);
beta_psi ~ normal(0, 1);
beta_p ~ normal(0, 1);
for (i in 1:n_site) {
if (n_survey[i] > 0) {
if (any_seen[i]) {
// site is occupied
target += log_psi[i]
+ bernoulli_logit_lpmf(y[start_idx[i]:end_idx[i]] |
logit_p[start_idx[i]:end_idx[i]]);
} else {
// site may or may not be occupied
target += log_sum_exp(
log_psi[i] + bernoulli_logit_lpmf(y[start_idx[i]:end_idx[i]] |
logit_p[start_idx[i]:end_idx[i]]),
log1m_psi[i]
);
}
}
}
}
I have tried sampling tau
directly (rather than as the reciprocal of sigma
), but that doesn’t seem to help with the problem.