I am trying to run an N-mixture type model with a different parameterization than I’ve used before. The model runs and the means are reasonable but the credible intervals are enormous. I’m not sure if it’s parameterization or putting things in the wrong blocks for sampling but I suspect something with the model formulation. Any help or suggestions would be appreciated. I’m still very new to stan.
Data generation in R
# Simulate N-mixture Data
n_sites <- 100 # Number of sites
n_visits <- 4 # Number of visits per site
x1 <- rnorm(n_sites, 0, 1) # covariate on normalized scale
beta0 <- log(10) # mean abundance on log scale
beta1 <- 1.2 # effect of covariate 1
lambda <- exp(beta0 + beta1 * x1)
p <- 0.3 # static detection probability
# True abundance
N <- rep(NA, times = length(lambda))
for(i in 1:n_sites) {
N[i] <- rpois(1, lambda = lambda[i])
}
# Count process
counts <- matrix(NA, n_sites, n_visits)
for(i in 1:n_sites) {
for(j in 1:n_visits) {
counts[i, j] <- rbinom(1, N[i], p)
}
}
Stan code
// Binomial mixture model with covariates
data {
int<lower=0> R; // Number of sites
int<lower=0> T; // Number of temporal replications
int<lower=0> y[R, T]; // Counts
vector[R] X; // Covariate
}
parameters {
real beta0; // intercept (log mean abundance)
real beta1; // slope parameter
real<lower=0, upper=1> p; // detection probability
}
transformed parameters {
vector[R] log_lambda; // Log expected count
vector[R] log_mu; // Log expected population size
real log_p; // Log detection probability
vector[R] lambda; // expected count intensity
vector[R] mu; // Expected population size (rate parameter)
log_mu = beta0 + beta1 * X;
log_p = log(p);
log_lambda = log_mu + log_p;
mu = exp(log_mu);
lambda = exp(log_lambda);
}
model {
// Priors
// beta0 ~ normal(0, 10);
// beta1 ~ normal(0, 10);
beta0 ~ uniform(-10, 10);
beta1 ~ uniform(-10, 10);
p ~ uniform(0, 1);
// Likelihood
for (i in 1:R) {
for(j in 1:T) {
y[i, j] ~ poisson(lambda[i]);
// y[i, j] ~ poisson_log(log_lambda[i]);
// target += poisson_log_lpmf(y[i, j] | log_lambda);
}
}
}
generated quantities {
int N[R];
int totalN;
for (i in 1:R)
// N[i] = poisson_rng(mu[i]);
N[i] = poisson_log_rng(log_mu[i]);
totalN = sum(N);
}
Code to run via rstan
library(rstan)
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
set.seed(123)
# load data
source("Code/Stan_Models/nmix_constant_p_data.R")
## Parameters monitored
params <- c("beta0", "beta1", "p", "N")
## MCMC settings
ni <- 20000
nt <- 10
nb <- 10000
nc <- 3
data_list <- list(counts = counts,
x1 = x1,
n_sites = nrow(counts),
n_visits = ncol(counts)
)
Nst <- apply(counts, 1, max) + 1 # Important to give good inits for latent N
## Initial values
inits <- lapply(1:nc, function(i)
list(N = Nst,
beta0 = runif(1, -1, 1),
beta1 = runif(1, -1, 1),
p = runif(1, 0.2, 0.8)))
## Call Stan from R
# if(!dir.exists("Results/Stan")) dir.create("Results/Stan", recursive = TRUE)
out <- stan("Code/Stan_Models/nmix_alt.stan",
data = list(y = counts, R = n_sites, T = n_visits, X = x1),
init = inits,
pars = params,
chains = nc, iter = ni, warmup = nb, thin = nt,
seed = 123,
open_progress = FALSE,
control=list(adapt_delta=0.9))
print(out, digits = 3)
plot(out, par = c("beta0", "beta1", "p"))
traceplot(out, par = c("beta0", "beta1", "p"))