I am building a nonlinear multilevel model fit to simulated data. My initial version had problems with divergent transitions, so I switched to non-centered parameterization, heavily leaning on this forum post for guidance: Centred vs. non-centred parametrisation with lognormal likelihood - #9 by bbbales2
The model now fails to initialize, and banging my head against this wall is no longer productive. Can anyone please take a look to see if there is an obvious place that I went wrong?
The model is meant to estimate the effects of harvester (q_a), date (q_d), and site (q_l) on catch. It takes the form of the catch equation:
C = E*q*N^\beta
The catchability term, q, is then broken down into additive random effects.
q_{total}=q_{mean}+q_{harvester}+q_{date}+q_{site}
I simulated the data in R with this code, where for each date (d), two randomly selected sites (l) were each harvested by three randomly selected harvesters (a).
library(dplyr)
set.seed(395)
catch.fun.err<-function(effort, q_mu, q_d, q_a, q_l, popDensity, beta, error){
catch=(effort*(q_mu+q_d+q_a+q_l)*popDensity^beta)*error
return(catch)
}
# 90 unique dates
D<-90
# 20 unique anglers
A<-20
# 10 unique lakes
L<-10
q_d<-rlnorm(D, -1, 0.5)
q_a<-rlnorm(A, -2, 0.2)
q_l<-rlnorm(L, -2, 0.01)
q_mu<-0.2
popDensity<-rlnorm(L, meanlog=2, sdlog=0.2)
beta<-0.25
# D, A, and L indexes and their q values
dates<-data.frame(D=seq(1:D), q_d=q_d)
anglers<-data.frame(A=seq(1:A), q_a=q_a)
lakes<-data.frame(L=seq(1:L), q_l=q_l, popDensity=popDensity)
# sample randomly with replacement from anglers and lakes for each date. then left_join approprate q values by A, D, and L selection
lakeSamples<-sample(lakes$L, 180, replace=TRUE)
anglerSamples<-sample(anglers$A, 540, replace=TRUE)
# 2 lakes visited each day, three anglers at each lake
fake.df.hier<-data.frame(D=rep(dates$D, each=6),
L=rep(lakeSamples, each=3),
A=anglerSamples)%>%
dplyr::left_join(dates, by="D")%>%
dplyr::left_join(anglers, by="A")%>%
dplyr::left_join(lakes, by="L")%>%
dplyr::mutate(beta=rep(0.25),
q_mu=rep(0.2),
effort=rlnorm(540, meanlog=1.25, sdlog=0.25),
error=rlnorm(540, meanlog=0, sdlog=0.3),
lmbCatch=floor(catch.fun.err(effort, q_mu, q_d, q_a, q_l, popDensity, beta, error)))
fake.data.hier<-list(N=nrow(fake.df.hier),
D=D,
L=L,
A=A,
DD=fake.df.hier$D,
LL=fake.df.hier$L,
AA=fake.df.hier$A,
effort=fake.df.hier$effort,
popDensity=fake.df.hier$popDensity,
lmbCatch=fake.df.hier$lmbCatch)
And this is the model that I fit to it:
data {
// number of observations
int<lower=1> N;
// number of anglers
int<lower=1> A;
// number of unique dates
int<lower=1> D;
// number of lakes
int<lower=1> L;
// all observations of catch (response)
array[N] int<lower=0> lmbCatch;
// indexing observations by anglers, dates, and lakes
array[N] int<lower=1, upper=A> AA;
array[N] int<lower=1, upper=D> DD;
array[N] int<lower=1, upper=L> LL;
// all observations of effort (covariate)
array[N] real<lower=0> effort;
// all estimates of population density (covariate)
array[N] real<lower=0> popDensity;
}
// phi is inverse overdispersion param
parameters {
// I want different estimates of q for different anglers, dates, and lakes, as well as an overall mean q
// without these zero lower bounds, initialization fails
array[A] real<lower=0> q_a_raw;
array[D] real<lower=0> q_d_raw;
array[L] real<lower=0> q_l_raw;
real<lower=0> q_mu;
real beta;
real phi;
// hyperparameters
real mu_q_a;
real<lower=0> sigma_q_a;
real mu_q_d;
real<lower=0> sigma_q_d;
real mu_q_l;
real<lower=0> sigma_q_l;
}
transformed parameters{
array[N] real<lower=0> catchHat;
array[N] real logCatchHat;
array[A] real log_q_a;
array[D] real log_q_d;
array[L] real log_q_l;
array[A] real<lower=0> q_a;
array[D] real<lower=0> q_d;
array[L] real<lower=0> q_l;
for(i in 1:N){
catchHat[i] = effort[i] .* (q_mu + q_a[AA[i]] + q_d[DD[i]] + q_l[LL[i]]) .* popDensity[i]^beta;
}
logCatchHat = log(catchHat);
log_q_a = log(q_a);
log_q_d = log(q_d);
log_q_l = log(q_l);
for(a in 1:A){
log_q_a[A] = mu_q_a + sigma_q_a * q_a_raw[A];
}
for(d in 1:D){
log_q_d[D] = mu_q_d + sigma_q_d * q_d_raw[D];
}
for(l in 1:L){
log_q_l[L] = mu_q_l + sigma_q_l * q_l_raw[L];
}
}
model {
lmbCatch~neg_binomial_2_log(logCatchHat, phi);
// prior on logCatchHat. Not sure if I need this
logCatchHat~lognormal(0,1);
q_a_raw~std_normal();
q_d_raw~std_normal();
q_l_raw~std_normal();
// these need to be above zero
q_mu~lognormal(0,0.5);
// these need to be able to include negatives
mu_q_a~normal(0,1);
mu_q_d~normal(0,1);
mu_q_l~normal(0,1);
sigma_q_a~lognormal(0,1);
sigma_q_d~lognormal(0,1);
sigma_q_l~lognormal(0,1);
phi~inv_gamma(0.4, 0.3);
beta~lognormal(0,0.5);
}