Hi everyone,
I have been working on modelling the north-south migration of GPS-tagged animals over time, using a Hilbert space Gaussian process approach. Following my previous post here, I have been incorporating the individual animal ID (AID
) in a similar manner to the “weekday with increasing magnitude” effect in the Birthdays case study, i.e. the individual effects (beta_AID
) can vary over time according to a Matérn 1/2 kernel (which is shared among all individuals).
However, I am encountering convergence problems when fitting the model to data. A subset of the parameters have very high Rhat and very low ess_bulk:
Examining traceplots for the sigma
and lengthscale
parameters, it looks like there is poor mixing and/or multimodality
It also looks like some of the some of the individual-level effects are multimodal:
I have tried several things to fix this, including enforcing a sum-to-zero constraint on beta_AID
(inspired by this post, both via f_AID = append_row(beta_AID, -sum(beta_AID));
and target += normal_lpdf(sum(b) | 0, 0.001)
), and messing about with different priors on the lengthscales, sigmas, and beta_AID
, but have not yet managed to improve things.
However I am fairly sure that the beta_AID
and f3
part of the model is to blame, as f1
(squared exponential) and f2
(periodic) work fine together, and including only f1
+f3
or f1
+f3
doesn’t seem to work either.
Any advice about how I can improve the model would be greatly appreciated!
The full model code:
functions{
vector diagSPD_EQ(real alpha, real rho, real L, int M) {
return sqrt((alpha) * sqrt(2*pi()) * rho * exp(-0.5*(rho*pi()/2/L)^2 * linspaced_vector(M, 1, M)^2));
}
vector diagSPD_periodic(real alpha, real rho, int M) {
real a = 1/rho^2;
array[M] int one_to_M;
for (m in 1:M) one_to_M[m] = m;
vector[M] q = sqrt(alpha * 2 / exp(a) * to_vector(modified_bessel_first_kind(one_to_M, a)));
return append_row(q,q);
}
vector diagSPD_Matern12(real alpha, real rho, real L, int M) {
vector[M] indices = linspaced_vector(M, 1, M);
real factor = 2;
vector[M] denom = rho * ((1 / rho)^2 + pow(pi() / 2 / L * indices, 2));
return alpha * sqrt(factor * inv(denom));
}
matrix PHI(int N, int M, real L, vector x) {
return sin(diag_post_multiply(rep_matrix(pi()/(2*L) * (x+L), M), linspaced_vector(M, 1, M)))/sqrt(L);
}
matrix PHI_periodic(int N, int M, real w0, vector x) {
matrix[N,M] mw0x = diag_post_multiply(rep_matrix(w0*x, M), linspaced_vector(M, 1, M));
return append_col(cos(mw0x), sin(mw0x));
}
}
data{
int N; // Number of observations
array[N] real y; // Northing
array[N] real x; // Time (days)
array[N] int AID; // Animal ID for each observation
int N_animals; // Total number of animals
real<lower=0> c_f1; // factor c to determine the boundary value L
int<lower=1> M_f1; // number of basis functions for smooth function
int<lower=1> J_f2; // number of cos and sin functions for periodic function
real<lower=0> c_f3; // factor c to determine the boundary value L
int<lower=1> M_f3; // number of basis functions for smooth function
}
transformed data {
// Normalize data
real xmean = mean(x);
real ymean = mean(y);
real xsd = sd(x);
real ysd = sd(y);
vector[N] xn = (to_vector(x) - xmean)/xsd;
vector[N] yn = (to_vector(y) - ymean)/ysd;
// Basis functions for f1
real L_f1 = c_f1*max(xn);
matrix[N,M_f1] PHI_f1 = PHI(N, M_f1, L_f1, xn);
// Basis functions for f2
real period_year = 365.25/xsd;
matrix[N,2*J_f2] PHI_f2 = PHI_periodic(N, J_f2, 2*pi()/period_year, xn);
// Basis functions for f3
real L_f3= c_f3*max(xn);
matrix[N,M_f3] PHI_f3 = PHI(N, M_f3, L_f3, xn);
// Concatenated basis functions - f1 and f2
matrix[N,M_f1+2*J_f2] PHI_f = append_col(PHI_f1, PHI_f2);
}
parameters {
vector[M_f1] beta_f1; // the basis functions coefficients for f1
vector[2*J_f2] beta_f2; // the basis functions coefficients for f2
vector[M_f3] beta_f3; // the basis functions coefficients for f1
ordered[N_animals-1] beta_AID; // individual-level offset
real<lower=0> lengthscale_f1; // lengthscale for f1
real<lower=0> lengthscale_f2; // lengthscale for f2
real<lower=0> lengthscale_f3; // lengthscale for f3
real<lower=0> sigma_f1; // scale of f1
real<lower=0> sigma_f2; // scale of f2
real<lower=0> sigma_f3; // scale of f3
real<lower=0> sigma; // residual scale
}
model {
// spectral densities for f1, f2, and f3
vector[M_f1] diagSPD_f1 = diagSPD_EQ(sigma_f1, lengthscale_f1, L_f1, M_f1);
vector[2*J_f2] diagSPD_f2 = diagSPD_periodic(sigma_f2, lengthscale_f2, J_f2);
vector[M_f3] diagSPD_f3 = diagSPD_Matern12(sigma_f3, lengthscale_f3, L_f3, M_f3);
// priors
beta_f1 ~ normal(0, 1);
beta_f2 ~ normal(0, 1);
beta_f3 ~ normal(0, 1);
beta_AID ~ normal(0, 0.1);
lengthscale_f1 ~ lognormal(log(1800/xsd), 0.3);
lengthscale_f2 ~ lognormal(log(365/xsd), 0.3);
lengthscale_f3 ~ lognormal(log(30/xsd), 0.3);
sigma_f1 ~ normal(0, 1);
sigma_f2 ~ normal(0, 1);
sigma_f3 ~ normal(0, 0.1);
sigma ~ normal(0, 0.1);
// Individual-level offset
vector[N_animals] f_AID = append_row(0, beta_AID);
vector[N] f3 = PHI_f3 * (diagSPD_f3 .* beta_f3);
vector[N] intercept = 0.0 + exp(f3) .* f_AID[AID];
// Model
yn ~ normal_id_glm(PHI_f,
intercept,
append_row(diagSPD_f1 .* beta_f1, diagSPD_f2 .* beta_f2),
sigma);
}
generated quantities {
vector[N] f1;
vector[N] f2;
vector[N] f3;
vector[N] f;
vector[N_animals] f_AID = append_row(0, beta_AID);
vector[N] log_lik;
{
// spectral densities for f1, f2, and f3
vector[M_f1] diagSPD_f1 = diagSPD_EQ(sigma_f1, lengthscale_f1, L_f1, M_f1);
vector[2*J_f2] diagSPD_f2 = diagSPD_periodic(sigma_f2, lengthscale_f2, J_f2);
vector[M_f3] diagSPD_f3 = diagSPD_Matern12(sigma_f3, lengthscale_f3, L_f3, M_f3);
// functions scaled back to original scale
f1 = (0.0 + PHI_f1 * (diagSPD_f1 .* beta_f1))*ysd;
f2 = (PHI_f2 * (diagSPD_f2 .* beta_f2))*ysd;
f3 = (exp(PHI_f3 * (diagSPD_f3 .* beta_f3)) .* f_AID[AID])*ysd;
f = f1 + f2 + f3 + ymean;
// log_liks for loo
for (n in 1:N) log_lik[n] = normal_lpdf(y[n] | f[n], sigma);
}
}