Hello!
I am a graduate student in ecology studying phenological plasticity in plants, or how genotypes and environments interact to generate variation in traits (e.g. the timing of flowering).
Background
A common approach to this uses varying-intercept varying-slope regression with groupings by genotype, g as in:
Where \alpha_g is the effect of genotype g on an outcome, y, \theta is the effect of the environment (in agricultural studies, this is a given farm x year), and \lambda_g is interpreted as a genotype by environment interaction term.
A recent publication by Turbet Delof et al 2025 developed a Stan model for this framework. I’d like to use this model in my own work, but I am interested in specifying a hierarchical model for \theta such that \theta is partially determined by a set of observable environmental predictors, X and a varying intercept for the j^{th} farm. Something like:
Question
I took the Turbet Delof et al 2025 model and modified the Stan code to include a non-centered parameterization of the model on theta, and further vectorized the code (code pasted below). My question: I notice some correlation in posterior draws of \alpha_g and \theta_e in my model and I am having a hard time rationalizing why this might be. The two main possibilities that come to mind are:
- An identifiability issue.
- Most likely, just an artifact of the estimator (which is essentially just partitioning variance in y into G, E, GxE) so as theta increases, alpha necessarily decreases.
I think I am just looking for readings on the topic, or suggestions for some EDA/Diagnostics I might do mature my thinking on this. I am worried that correlation between two parameters like this indicates that any inferences made with this model might be less than robust? In the mean time, I will work on simulated data!
Thanks for reading!
–
data {
int n_obs; // number of observations
array[n_obs] real y; // trait of interest (outcome)
array[n_obs] int germplasm; // germplasm id (from 1 to n_germ)
int n_germ; // number of germplasms
array[n_obs] int environment; // environment id (from 1 to n_env)
int n_env; // number of environments (farm-years)
array[n_env] int farm; // farm id (from 1 to n_env)
int n_farm; // number of farms
int p; // number of climate predictors for theta
matrix[n_env, p] X_env; // trial x p matrix of climate predictors
// Provide priors for location of G, and variances in G, E, GxE, and env
real prior_scale_mu; // var for location of germplasm effect
real prior_location_mu; // location for germplasm effect
real prior_scale_sigma_alpha; // var for germplasm effect
real prior_scale_sigma_lambda; // var for GxE effect
real prior_scale_sigma_theta; // var for environment effect
real prior_scale_sigma_u; // var for varying farm intercepts
real prior_scale_sigma; // prior for residual noise
}
parameters {
real unscaled_mu;
real<lower = 0> unscaled_sigma_alpha;
real<lower = 0> unscaled_sigma_theta;
real<lower = 0> unscaled_sigma_lambda;
real<lower = 0> unscaled_sigma_u;
real<lower = 0> unscaled_sigma;
vector[n_germ] unscaled_alpha;
vector[n_germ] unscaled_lambda;
real<lower = 2> nu;
vector[n_env] raw_theta;
vector[n_farm] unscaled_u;
vector[p] beta_env;
}
transformed parameters {
real mu;
real sigma_alpha;
real sigma_lambda;
real sigma_theta;
real sigma_u;
real sigma;
vector[n_germ] alpha;
vector[n_germ] lambda;
vector[n_env] theta;
vector[n_farm] u;
// Level 1 - Environment
sigma_theta = prior_scale_sigma_theta * unscaled_sigma_theta;
sigma_u = prior_scale_sigma_u * unscaled_sigma_u;
u = sigma_u * unscaled_u;
theta = X_env * beta_env + u[farm] + sigma_theta * raw_theta;
// Level 2 - G, E, GxE
mu = prior_location_mu + prior_scale_mu * unscaled_mu;
sigma_alpha = prior_scale_sigma_alpha * unscaled_sigma_alpha;
sigma_lambda = prior_scale_sigma_lambda * unscaled_sigma_lambda;
sigma = prior_scale_sigma * unscaled_sigma;
alpha = mu + sigma_alpha * unscaled_alpha;
lambda = sigma_lambda * unscaled_lambda;
}
model {
// Priors
unscaled_sigma_theta ~ normal(0.0, 1.0);
beta_env ~ normal(0.0, 1.0);
nu ~ gamma(2, 0.1);
unscaled_mu ~ normal(0.0, 1.0);
unscaled_alpha ~ normal(0.0, 1.0);
unscaled_sigma_alpha ~ normal(0.0, 1.0);
unscaled_lambda ~ normal(0.0, 1.0);
unscaled_sigma_lambda ~ normal(0.0, 1.0);
unscaled_sigma_u ~ normal(0.0, 1.0);
unscaled_u ~ normal(0.0, 1.0);
// --- Likelihood ---
raw_theta ~ std_normal(); // Level 1 (non-centered)
{
vector[n_obs] theta_env = theta[environment];
vector[n_obs] alpha_g = alpha[germplasm];
vector[n_obs] lambda_g = lambda[germplasm];
vector[n_obs] mu_y = alpha_g + theta_env + lambda_g .* theta_env;
y ~ student_t(nu, mu_y, sigma); // Level 2
}
}