I’m fitting a multivariate hierchical stochastic factor model with time-varying factor standard deviations and am planning to extend it to the case where the factor loadings are stochastic too. I’m hoping to get better estimates of the covariance matrix of the factor standard deviations too.
To avoid divergences and speed this up, I’ve set init_r to be 0.5, adapt_delta to 0.99, tree-depth to 15 and step_size to be 0.25. However, I still get divergences and have no idea how to fix them.
data {
int <lower=1> P; // number of dimensions
int <lower=1> F; // number of latent factors
int <lower=1> TS; // number of time steps
real disc; // discount
vector[P] x[TS];
}
transformed data {
vector[F] L_diag = rep_vector(1.0, F);
int<lower=1> beta_l = F * (P - F) + F * (F - 1) / 2; // number of lower-triangular, non-zero loadings
}
parameters {
// reparameterisation parameters
vector[F] z_log_f_sd[TS]; // reparameterised log latent factor standard deviations
vector[F] z_fac[TS]; // reparameterised latent factors
// parameters
vector[beta_l] L_lower_init; // initial lower diagonal loadings
vector[F] log_f_sd_loc; // latent factor standard deviation transform scale
vector[F] log_f_sd_scale; // latent factor standard deviation transform scale
// vector[P] x_loc;
// vector[P] x_scale;
// priors
cholesky_factor_corr[F] L_Omega;
vector<lower=0,upper=pi()/2>[F] tau_unif;
// vector<lower=0>[F] chol_log_f_sd_sd; // diagonal standard deviation
real<lower=0> x_sd; // x standard deviation
// vector[F] log_f_sd_sd; // diagonal standard deviation
// real log_x_sd; // x standard deviation
}
transformed parameters {
vector[F] log_f_sd[TS];
matrix[P, F] beta;
vector[F] mu[TS];
vector[F] tau = tan(tau_unif);
cholesky_factor_cov[F] chol_log_f_sd_sd = diag_pre_multiply(tau, L_Omega);
// vector[F] chol_log_f_sd_sd = exp(log_f_sd_sd);
// real x_sd = exp(log_x_sd);
{
int idx;
idx = 1;
for (j in 1:F) {
beta[j, j] = L_diag[j]; // set positive diagonal loadings
for (k in (j+1):F){
beta[j, k] = 0; // set upper triangle values to 0
}
for (i in (j+1):P){
beta[i, j] = L_lower_init[idx]; // set lower diagonal loadings
idx = idx + 1;
}
}
}
for (t in 1:TS){
// log_f_sd[t] = log_f_sd_loc + chol_log_f_sd_sd .* z_log_f_sd[t];
log_f_sd[t] = log_f_sd_loc + chol_log_f_sd_sd * z_log_f_sd[t];
if (t > 1){
log_f_sd[t] += log_f_sd_scale .* log_f_sd[t-1];
}
// mu[t] = x_loc + x_scale .* (beta * (exp(log_f_sd[t]) .* z_fac[t]));
mu[t] = beta * (exp(log_f_sd[t]) .* z_fac[t]);
}
}
model {
// priors
L_lower_init ~ std_normal();
log_f_sd_loc ~ normal(0, 0.5);
log_f_sd_scale ~ normal(0, 0.5);
// x_loc ~ std_normal();
// x_scale ~ std_normal();
L_Omega ~ lkj_corr_cholesky(4);
// chol_log_f_sd_sd ~ lognormal(-1, 1);
x_sd ~ lognormal(2, 1./10);
// log_f_sd_sd ~ normal(-2, 0.1);
// log_x_sd ~ normal(-2, 0.1);
// likelihood
for (t in 1:TS){
z_log_f_sd[t] ~ std_normal();
z_fac[t] ~ std_normal();
// if (is_nan(mu[t][1]) || is_nan(mu[t][2]) || is_nan(mu[t][3]) || is_nan(mu[t][4]) || is_nan(mu[t][5]) ||
// is_nan(mu[t][6]) || is_nan(mu[t][7]) || is_nan(mu[t][8]) || is_nan(mu[t][9]) || is_nan(mu[t][10])){
// print("chol_log_f_sd_sd", chol_log_f_sd_sd);
// print("log_f_sd_loc", log_f_sd_loc);
// print("log_f_sd_scale", log_f_sd_scale);
// print("z_log_f_sd", z_log_f_sd[t]);
// print("log_f_sd", log_f_sd[t-1]);
// print("log_f_sd", log_f_sd[t]);
// print("f_sd", exp(log_f_sd[t]));
// }
x[t] ~ normal(mu[t], x_sd);
}
}
I’m thinking of potentially concatentating all the auxiliary reparameterisation parameters into one long vector but I’m not sure how much help that would be. Currently, I don’t think I’m doing any redundant computations. Also I’m unsure if I could whiten my parameterisations to make them more stable.
Any help and performance tips would be much appreciated.