Hello,
I’m currently working on a hierarchical model for spruce budworm development times. I’ve constructed my model in Template Model Builder (which by default does maximum likelihood using Laplace approximation for Gaussian random effects), but am using Stan to do Bayesian/HMC analysis via the ‘tmbstan’ package (https://CRAN.R-project.org/package=tmbstan), which uses Stan/HMC/NUTS as a back-end sampler for models originally constructed in TMB. The sampler is indicating that there are 6 divergent transitions out of 18,000 samples (iter = 5000, warmup = 500, chains = 4, adapt_delta = 0.95). Increasing adapt_delta to 0.99 eliminates some but not all of the divergences. I have decentralized all of my latent variables to avoid funnelling, and I’ve read the Gelman et al. “Bayesian workflow” paper and all of the publicly posted advice I can find about resolving divergent transitions.
The data are interval-censored observations of times to development events. My model consists of a temperature-dependent development rate curve representing the median development rate of the population at a specified temperature (rho25, HA, TL, HL, TH, HH); a lognormal distribution of individual variation around the median (the variance parameter of this distribution, s_eps, is estimated/sampled) and a Cauchy iid random effect/latent variable that allows for lack of fit/variation around the parametric development rate curve for each treatment temperature (the scale parameter, s_upsilon, is also being sampled/estimated). The likelihood structure of the model follows Régnière et al. 2012 (https://doi.org/10.1016/j.jinsphys.2012.01.010), with a development rate curve from Schoolfield et al. 1981 (https://doi.org/10.1016/0022-5193(81)90246-0). The model code is as follows (written in C++, but analogous to Stan code):
#include <TMB.hpp>
#include <iostream>
#include <string>
template<class Type>
Type calc_TA(Type HL, Type HH, Type TL, Type TH) {
Type c3 = Type(0.0001987);
Type den, tphi;
den = c3*log(-HL/HH) + (HL/TL) - (HH/TH);
tphi = (HL - HH)/den;
return tphi;
}
template<class Type>
Type calc_pred(Type temp, Type rho25, Type HA, Type TL, Type HL, Type TH, Type HH, Type TA) {
Type c1 = Type(273);
Type c2 = TA;
Type c3 = Type(0.0001987);
Type tK, num, den1, den2, tau;
tK = temp + c1;
num = rho25 * tK/c2 * exp(HA/c3* (1/c2-1/tK));
den1 = exp(HL/c3 *(1/TL-1/tK));
den2 = exp(HH/c3 *(1/TH-1/tK));
tau = 1/(num/(1 + den1 + den2));
return tau;
}
template<class Type>
Type dnorm1(Type x){
return Type(1.0/sqrt(2.0*M_PI)) * exp(-Type(.5)*x*x);
}
TMB_ATOMIC_VECTOR_FUNCTION(
// ATOMIC_NAME
pnorm_log
,
// OUTPUT_DIM
1
,
// ATOMIC_DOUBLE
ty[0] = atomic::Rmath::Rf_pnorm5(tx[0],0,1,1,1);
,
// ATOMIC_REVERSE
Type W = ty[0]; // Function value from forward pass
Type DW = dnorm1(tx[0])/exp(W);
px[0] = DW * py[0];
)
template<class Type>
Type pnorm_log(Type x){
CppAD::vector<Type> tx(1);
tx[0] = x;
return pnorm_log(tx)[0];
}
template<class Type>
Type qcauchy(Type y, Type location, Type scale){
Type q = location + scale*tan(M_PI*(y - Type(0.5)));
return q;
}
template<class Type>
Type objective_function<Type>::operator() ()
{
DATA_IVECTOR(nobs);
DATA_IVECTOR(block);
DATA_VECTOR(time1);
DATA_VECTOR(time2);
DATA_VECTOR(temp1);
DATA_VECTOR(temp2);
DATA_VECTOR(time2d);
DATA_INTEGER(use_prior);
PARAMETER(rho25);
PARAMETER(HA);
PARAMETER(TL);
PARAMETER(HL);
PARAMETER(TH);
PARAMETER(HH);
PARAMETER(s_eps);
PARAMETER(s_upsilon);
PARAMETER_VECTOR(upsilon);
Type jnll = 0;
Type tpred1;
Type tpred2;
Type epsm1;
Type epsij;
Type pnormdiff;
Type nlogp;
Type epsm1_std;
Type epsij_std;
Type TA;
Type u_upsilon;
Type c_upsilon;
Type l_s_eps = log(s_eps);
Type l_s_upsilon = log(s_upsilon);
for (int i=0; i<time1.size(); i++) {
TA = calc_TA(HL, HH, TL, TH);
tpred1 = calc_pred(temp1(i), rho25, HA, TL, HL, TH, HH, TA);
tpred2 = calc_pred(temp2(i), rho25, HA, TL, HL, TH, HH, TA);
u_upsilon = pnorm(upsilon(block(i)), Type(0), Type(1));
c_upsilon = qcauchy(u_upsilon, Type(1), s_upsilon);
tpred1 *= c_upsilon;
tpred2 *= c_upsilon;
epsm1 = log(time1(i)/tpred1 + time2d(i)/tpred2);
epsij = log(time1(i)/tpred1 + time2(i)/tpred2);
epsm1_std = epsm1/s_eps;
epsij_std = epsij/s_eps;
Type pnorm_ij = pnorm_log(epsij_std);
Type pnorm_m1 = pnorm_log(epsm1_std);
pnormdiff = logspace_sub(pnorm_ij, pnorm_m1);
nlogp = nobs(i)*pnormdiff;
jnll -= nlogp;
}
jnll -= sum(dnorm(upsilon, Type(0), Type(1), 1));
if (use_prior == 1) {
jnll -= dgamma(rho25, Type(2), Type(0.25), 1);
jnll -= dgamma(-HL, Type(6), Type(2), 1);
jnll -= dgamma(HA, Type(5), Type(0.2), 1);
jnll -= dgamma(HH, Type(10), Type(3), 1);
jnll -= dnorm(TL, Type(284), Type(2), 1);
jnll -= dnorm(TH, Type(304), Type(2), 1);
jnll -= dnorm(l_s_eps, Type(-1.5), Type(0.1), 1);
jnll -= dnorm(l_s_upsilon, Type(-2.5), Type(0.05), 1);
}
return jnll;
}
There are no model diagnostic warnings other than the divergent transitions. Here is the pairs plot for the initial run (adapt_delta = 0.95):
Judging by the pairs plot, the upsilon random effect terms are likely the cause of the divergences; indeed, removing the lack of fit portion of the model eliminates them.
Any advice on how to proceed with these divergent transitions would be greatly appreciated.