Hi, I’ve got a hierarchical model with measurement uncertainties, and I’m running into max treedepth issues. These tend to be accompanied with problems in the split R-hat. There don’t seem to be any other problems (<10/2000 divergences, ESS and E-BFMI are fine). I can increase the max treedepth and it’ll sometimes fix the problem, but the sampling becomes quite slow. I’d like to try out a non-centered parameterization, but am unsure of how to do this. I’ve read the the Stan User’s Guide section and various forums posts, but haven’t come across anything that would seem to work for me (all examples seem to have a parameter on the left side (e.g., parameter = means + L * alpha
, where L
is the covariance matrix and alpha ~ std_normal()
), but I’d like to have some data on the left side (e.g., observed_data = parameter + L * alpha
). I’ve included my code below. Thanks. I’d also appreciate any other suggestions for how to speed things up.
#include /shared_functions.stan
data {
int<lower=1> N; // total length of data
int<lower=0, upper=1> pos_obs[N]; // boolean array indicating whether position is observed
int<lower=0, upper=1> dist_obs[N]; // boolean array indicating whether distance is observed
int<lower=0, upper=1> pm_obs[N]; // boolean array indicating whether pm is observed
int<lower=0, upper=1> vlos_obs[N]; // boolean array indicating whether vlos is observed
// position measurements
row_vector[N] ra_measured;
row_vector[N] dec_measured;
row_vector[N] dist_measured;
// position measurement uncertainties
row_vector[N] ra_err;
row_vector[N] dec_err;
row_vector[N] dist_err;
// velocity measurements
row_vector[N] pmra_measured;
row_vector[N] pmdec_measured;
row_vector[N] vlos_measured;
// velocity measurement uncertainties
row_vector[N] pmra_err;
row_vector[N] pmdec_err;
row_vector[N] vlos_err;
vector[N] pos_corr;
vector[N] pm_corr;
vector[N] ra_pmra_corr;
vector[N] dec_pmdec_corr;
vector[N] ra_pmdec_corr;
vector[N] dec_pmra_corr;
int<lower=1> grainsize;
}
transformed data {
real deg2rad = pi() / 180.;
matrix[3, 3] R = [
[-0.05487395617553902, -0.8734371822248346, -0.48383503143198114],
[0.4941107627040048, -0.4448286178025452, 0.7469819642829028],
[-0.8676654903323697, -0.1980782408317943, 0.4559842183620723]
];
matrix[3, 3] H = [
[0.9999967207734917, 0.0, 0.002560945579906427],
[0.0, 1.0, 0.0],
[-0.002560945579906427, 0.0, 0.9999967207734917]
];
matrix[3, 3] A = H * R;
vector[3] offset = to_vector([8.122, 0., 0.]);
vector[3] solarmotion = to_vector([12.9, 245.6, 7.78]);
cholesky_factor_cov[4] pos_pm_cov_mats[N];
vector[4] pos_pm_measured[N];
real dummy[N] = rep_array(1, N);
for (i in 1:N) {
/* diag(S) * R * diag(S) */
/* where S is a vector of standard deviations (errs) */
/* and R is the correlation matrix */
// make correlation matrix
// diagonals are 1s
for (j in 1:4) {
pos_pm_cov_mats[i, j, j] = 1.;
}
// fill in with correlation data
pos_pm_cov_mats[i, 1, 2] = pos_corr[i];
pos_pm_cov_mats[i, 2, 1] = pos_corr[i];
pos_pm_cov_mats[i, 1, 3] = ra_pmra_corr[i];
pos_pm_cov_mats[i, 3, 1] = ra_pmra_corr[i];
pos_pm_cov_mats[i, 1, 4] = ra_pmdec_corr[i];
pos_pm_cov_mats[i, 4, 1] = ra_pmdec_corr[i];
pos_pm_cov_mats[i, 2, 3] = dec_pmra_corr[i];
pos_pm_cov_mats[i, 3, 2] = dec_pmra_corr[i];
pos_pm_cov_mats[i, 2, 4] = dec_pmdec_corr[i];
pos_pm_cov_mats[i, 4, 2] = dec_pmdec_corr[i];
pos_pm_cov_mats[i, 3, 4] = pm_corr[i];
pos_pm_cov_mats[i, 4, 3] = pm_corr[i];
// then convert it to a covariance matrix
pos_pm_cov_mats[i] = quad_form_diag(pos_pm_cov_mats[i], [ra_err[i], dec_err[i], pmra_err[i], pmdec_err[i]]);
// and apply cholesky decomposition
pos_pm_cov_mats[i] = cholesky_decompose(pos_pm_cov_mats[i]);
}
for (i in 1:N) {
pos_pm_measured[i] = [ra_measured[i], dec_measured[i], pmra_measured[i], pmdec_measured[i]]';
}
}
parameters {
real<lower=0.25, upper=0.7> p_gamma;
real<lower=20., upper=90.> phi0_raw;
real<lower=-(1-0.6254), upper=3.6254> beta_raw;
real<lower=max(to_vector([3, (-beta_raw + 0.6254) * (2. - p_gamma) + p_gamma / 2.])), upper=10.> p_alpha;
// "true" position parameters
row_vector<lower=0, upper=360.>[N] ra;
row_vector<lower=-90., upper=90.>[N] dec;
row_vector<lower=0, upper=max(dist_measured + 5 * dist_err)>[N] dist;
// "true" velocity parameters
row_vector<lower=min(pmra_measured - 5 * pmra_err), upper=max(pmra_measured + 5 * pmra_err)>[N] pmra;
row_vector<lower=min(pmdec_measured - 5 * pmdec_err), upper=max(pmdec_measured + 5 * pmdec_err)>[N] pmdec;
row_vector<lower=min(vlos_measured - 5 * vlos_err), upper=max(vlos_measured + 5 * vlos_err)>[N] vlos;
}
transformed parameters {
real p_phi0 = phi0_raw + 18.21597964588313;
real<lower=-3, upper=1> p_beta = -beta_raw + 0.6254;
row_vector[N] ra_rad = ra * deg2rad;
row_vector[N] dec_rad = dec * deg2rad;
matrix[4, N] pos_pm = [ra, dec, pmra, pmdec];
matrix[3, N] vels_sph;
matrix[3, N] y;
vels_sph = transform_vels_vec(ra_rad, dec_rad, dist, pmra, pmdec, vlos, R, H, offset, solarmotion) ./ 100.;
row_vector[N] vt_sq = square(vels_sph[2]) + square(vels_sph[3]);
y[2] = sqrt(vt_sq);
y[1] = sqrt(square(vels_sph[1]) + vt_sq);
matrix[3, N] pos_gc = transform_pos_vec(ra_rad, dec_rad, dist, R, H);;
y[3] = sqrt(columns_dot_self(pos_gc));;
}
model {
p_gamma ~ normal(0.4335, 0.0445677);
phi0_raw ~ gamma(48.884347, 1./0.9755005176256963);
beta_raw ~ gamma(11.184, 1./0.092386);
p_alpha ~ normal(3.472888, 0.02051);
// observation process is here --------------------------
// how do i make this non-centered?
for (i in 1:N) {
pos_pm_measured[i] ~ multi_normal_cholesky(pos_pm[i], pos_pm_cov_mats[i]);
}
plx_measured ~ normal(plx, plx_err);
vlos_measured ~ normal(vlos, vlos_err);
// ---------------------------------------
y ~ df_vec(p_phi0, p_gamma, p_alpha, p_beta);
}