Hello dynamic forum participants,
I’ve built a somewhat fussy hierarchical changepoint model that is meant to quantify the migration animals on a population level (complete code below - any feedback on making it more efficient / smarter / faster is VERY welcome!) There are some nuances because there are shifting autocorrelations and the whole thing needs to be robust to irregular sampling, and the break points have to be smoothed - but at heart everything is a straightforward hierarchical Gaussian.
It works well for simulated data. But on real data, I can’t escape the dreaded: ejecting initial value: Log probability evaluates to log(0), i.e. negative infinity.
In an epic struggle to debug I have:
a) introduced initial values for priors [which can be quite informative in this case],
b) provided totally reasonable initial conditions,
c) printed the log-probabilities (and double-checked them outside of STAN), which are all very reasonable and non-zero.
d) torn out many of few remaining hairs.
Specifically, the complete set of sampling commands in the model block is:
// Sampling
t ~ normal(t_mean, t_sd);
dt ~ normal(dt_mean, dt_sd);
for(i in 1:k){
col(mu1, i) ~ multi_normal(mu1_mean, mu1_Sigma);
col(mu2, i) ~ multi_normal(mu2_mean, mu2_Sigma);
}
x_res ~ normal(mux_res, sigma_res); // where mux_res and sigma_res are n-length vectors
y_res ~ normal(muy_res, sigma_res);
and, when I print the values, via:
print("x_lpd: ", normal_lpdf(x_res | mux_res, sigma_res));
print("y_lpd: ", normal_lpdf(y_res | muy_res, sigma_res));
print("t_lpd: ", normal_lpdf(t | t_mean, t_sd));
print("dt_lpd: ", normal_lpdf(dt | dt_mean, dt_sd));
for(i in 1:k){
print("mu1_lpd ", i, ": ", multi_normal_lpdf(col(mu1, i) | mu1_mean, mu1_Sigma));
print("mu2_lpd ", i, ": ", multi_normal_lpdf(col(mu2, i) | mu2_mean, mu2_Sigma));
}
I get all perfectly reasonable numbers, no log(0’s) or nan’s anywhere to be seen.
I haven’t generated an MWE (but could put one together if needed - on simulated data).
So what else could be going wrong? Where else could it be drawing a log(0) from? What would a next debugging step be?
I also have some questions about where to place certain variables (e.g. parameters vs. transformed parameters), but I’m guessing an expert perusing the code might see if there are better ways than what I cobbled together.
Thanks kindly for any advice!
Elie
Complete model code below:
functions {
// this computes the expression for the coefficient on the variance at lag in an AR(1) process:
// \sum_{i=1}^lag phi^{i-1}
real getphilag(real phi, real lag) {
real philag;
int i;
philag = 0;
i = 0;
while (i <= lag) {
philag = philag + pow(phi, (lag-1)*2);
i = i + 1;
}
return philag;
}
// this function (at beta, say, > 10) approximates a cut function that rises from -delta to delta with slope 1/ (2*delta) from
real squash(real x, real delta, real beta){
real x_squashed;
x_squashed = (1.0 /(2.0 * delta*beta)) * log( (1.0 + exp(beta*(x + delta))) / (1.0 + exp(beta*(x - delta))) );
return x_squashed;
}
// this function sets up a step up and step back down function: s1-s2-s1 at times t1 and t2
real softdoublestep(real x, real s1, real s2, real t1, real t2, real k){
real x_doublestep;
x_doublestep = s1 + (s2-s1)/(1+exp(-2*k*(x-t1))) - (s2-s1)/(1+exp(-2*k*(x-t2)));
return x_doublestep;
}
}
data {
int<lower=0> n; // number of total observations
int k; // n. of individuals
real y[n]; // x-coordinate
real x[n]; // y-coordinate
real yday[n]; // day of year
real dday[n-k]; // yday[i] - yday[i-1] ... for EACH individual
int<lower=1,upper=k> id[n]; // ID of caribou
real inits[10]; // initial values
int printpars;
int printvals;
}
parameters {
// migration timing
real<lower = min(yday), upper = max(yday)> t_mean;
real<lower = 0> t_sd; // , upper = 30
// migration duration
real<lower = 0> dt_mean;
real<lower = 0> dt_sd;
// population mean
vector[2] mu1_mean;
vector[2] mu2_mean;
real<lower = 0> mux1_sigma;
real<lower = 0> muy1_sigma;
real<lower=-1, upper=1> rho1;
real<lower = 0> mux2_sigma;
real<lower = 0> muy2_sigma;
real<lower=-1, upper=1> rho2;
//OU parameters
real<lower = 0> sigma_r;
real<lower = -1, upper = 1> phi_r;
real<lower = 0> sigma_m;
real<lower = -1, upper = 1> phi_m;
// population level parameters
matrix[2,k] mu1;
matrix[2,k] mu2;
// row_vector[k] mu1[2];
// row_vector[k] mu2[2];
vector<lower = min(yday), upper = max(yday)>[k] t;
vector<lower = 0>[k] dt;
}
transformed parameters {
cov_matrix[2] mu1_Sigma;
cov_matrix[2] mu2_Sigma;
// convert sigmas to usable Variance-Covariance matrix
mu1_Sigma[1,1] = mux1_sigma;
mu1_Sigma[2,2] = muy1_sigma;
mu1_Sigma[1,2] = sqrt(mux1_sigma*muy1_sigma) * rho1;
mu1_Sigma[2,1] = sqrt(mux1_sigma*muy1_sigma) * rho1;
mu2_Sigma[1,1] = mux2_sigma;
mu2_Sigma[2,2] = muy2_sigma;
mu2_Sigma[1,2] = sqrt(mux2_sigma*muy2_sigma) * rho2;
mu2_Sigma[2,1] = sqrt(mux2_sigma*muy2_sigma) * rho2;
}
model {
// ancillary variables
vector[n] x_hat;
vector[n] y_hat;
vector[n-k] x_res;
vector[n-k] y_res;
vector[n-k] sigma_move;
vector[n-k] phi_move;
vector[n-k] mux_res;
vector[n-k] muy_res;
vector[n-k] sigma_res;
// individual values
int l;
int m;
// PRIORS
mu1_mean[1] ~ normal(inits[1], 30.0);
mu1_mean[2] ~ normal(inits[2], 30.0);
mu2_mean[1] ~ normal(inits[3], 30.0);
mu2_mean[2] ~ normal(inits[4], 30.0);
mux1_sigma ~ lognormal(log(inits[5]), log(30.0));
mux2_sigma ~ lognormal(log(inits[6]), log(30.0));
muy1_sigma ~ lognormal(log(inits[7]), log(30.0));
muy2_sigma ~ lognormal(log(inits[8]), log(30.0));
t_mean ~ normal(inits[9], 20.0) T[0, 365];
t_sd ~ exponential(1/50.0);
dt_mean ~ normal(inits[10], 5.0) T[0,100];
dt_sd ~ exponential(1/50.0);
// major transformations
l = 1;
m = 1;
while (l <= n && m <= (n-k)){
x_hat[l] = mu1[1,id[l]] + (mu2[1,id[l]] - mu1[1,id[l]]) * squash(yday[l] - t[id[l]] - dt[id[l]]/2, dt[id[l]]/2, 10.0);
y_hat[l] = mu1[2,id[l]] + (mu2[2,id[l]] - mu1[2,id[l]]) * squash(yday[l] - t[id[l]] - dt[id[l]]/2, dt[id[l]]/2, 10.0);
if(l>1 && id[l] == id[l-1]){
x_res[m] = x[l] - x_hat[l];
y_res[m] = y[l] - y_hat[l];
sigma_move[m] = softdoublestep(yday[l], sigma_r, sigma_m, t[id[l]], t[id[l]] + dt[id[l]], 10.0);
phi_move[m] = softdoublestep(yday[l], phi_r, phi_m, t[id[l]], t[id[l]] + dt[id[l]], 10.0);
mux_res[m] = (phi_move[m]^dday[m])*(x[l-1] - x_hat[l-1]);
muy_res[m] = (phi_move[m]^dday[m])*(y[l-1] - y_hat[l-1]);
sigma_res[m] = sigma_move[m] * sqrt(getphilag(phi_move[m], dday[m]));
m = m + 1;
}
l = l + 1;
}
print("pre-sampling target: ", target());
// Sampling
t ~ normal(t_mean, t_sd);
dt ~ normal(dt_mean, dt_sd);
for(i in 1:k){
col(mu1, i) ~ multi_normal(mu1_mean, mu1_Sigma);
col(mu2, i) ~ multi_normal(mu2_mean, mu2_Sigma);
}
x_res ~ normal(mux_res, sigma_res);
y_res ~ normal(muy_res, sigma_res);
print("post-sampling target: ", target());
if(printpars == 1){
print("Parameters:")
print("mu1_mean: ", mu1_mean);
print("mu2_mean: ", mu2_mean);
print("mu1_Sigma: ", mu1_Sigma);
print("mu2_Sigma: ", mu2_Sigma);
print("t_mean: ", t_mean);
print("dt_mean: ", dt_mean);
print("phi_r: ", phi_r, "; phi_m: ", phi_m);
print("sigma_r: ", sigma_r, "; sigma_m: ", sigma_m);
print("mu1: ", mu1);
print("mu2: ", mu2);
print("t: ", t);
print("dt: ", dt);
print("x_lpd: ", normal_lpdf(x_res | mux_res, sigma_res));
print("y_lpd: ", normal_lpdf(y_res | muy_res, sigma_res));
print("t_lpd: ", normal_lpdf(t | t_mean, t_sd));
print("dt_lpd: ", normal_lpdf(dt | dt_mean, dt_sd));
for(i in 1:k){
print("mu1_lpd ", i, ": ", multi_normal_lpdf(col(mu1, i) | mu1_mean, mu1_Sigma));
print("mu2_lpd ", i, ": ", multi_normal_lpdf(col(mu2, i) | mu2_mean, mu2_Sigma));
}
}
if(printvals == 1){
print("Values:")
print("x_res: ", x_res);
print("y_res: ", y_res);
print("mux_res: ", mux_res);
print("muy_res: ", muy_res);
print("sigma_res: ", sigma_res);
}
}