I’m running into an issue when I try to run the following Stan model (pystan 3.3.0, python 3.8.8) with a fairly large correlation matrix (J = 100); when I subset down my data such that the correlation matrix is smaller (J = 10), I’m able to run without issue.
The specific error I’m recovering is RuntimeError: Initialization failed.
; I’ve tried initializing my tau
and Omega
parameters with values .005, 1, and zero, to no effect. I’ve additionally reparametrized my model in terms of cholesky_factor_corr
/lkj_corr_cholesky
for omega, and multi_normal_cholesky
for beta, but continue to receive the same Initialization failed
error message.
Appreciate any guidance folks might have here.
data {
// Training Data (Individual-Level)
/// Model Metadata
int<lower=3> K; // Number of target classes
int<lower=0> N; // Number of interviews
int<lower=1> J; // Number of groups
int<lower=0> L; // Number of group-level predictors
//// Number of Levels for Varying Intercepts
int<lower=0> N_a;
int<lower=0> N_b;
int<lower=0> N_c;
int<lower=0> N_d;
int<lower=0> N_e;
int<lower=0> N_f;
/// Interview Targets
array[N] int<lower=1, upper=K> y; // Target
/// Varying Intercept Levels
array[N] int<lower=1,upper=N_a> a;
array[N] int<lower=1,upper=N_b> b;
array[N] int<lower=1,upper=N_c> c;
array[N] int<lower=1,upper=N_d> d;
array[N] int<lower=1,upper=N_e> e;
array[N] int<lower=1,upper=N_f> f;
//// Group-Level Predictors
array[N] int<lower=1, upper=J> jj; // Group assignment for individual
array[J] row_vector[L] u_jj; // Groupl-Level design matrix
}
parameters {
/// Prior Variances for Varying Intercepts
real<lower=0> sigma_a;
real<lower=0> sigma_b;
real<lower=0> sigma_c;
real<lower=0> sigma_d;
real<lower=0> sigma_e;
real<lower=0> sigma_f;
real<lower=0> sigma_jj;
/// Varying Intercepts
vector[K] alpha_zero;
vector<multiplier=sigma_a>[K] alpha_a;
vector<multiplier=sigma_b>[K] alpha_b;
array[N_c] vector<multiplier=sigma_c>[K] alpha_c;
array[N_d] vector<multiplier=sigma_d>[K] alpha_d;
array[N_e] vector<multiplier=sigma_e>[K] alpha_e;
array[N_f] vector<multiplier=sigma_f>[K] alpha_f;
array[J] vector<multiplier=sigma_jj>[K] alpha_jj;
// Group-Level Preictors
matrix[L,K] gamma; // Predictor Coefficient Matrix
array[K] corr_matrix[J] Omega; // Prior correlation matrix
array[K] vector<lower=0>[J] tau; // Prior correlation scale
matrix[J,K] beta; // Modeled group-level slopes
}
model {
// Group-Level Effect Coefficients
to_vector(gamma) ~ normal(0, 5);
// Declare Local Scope for Vectorized Group-Level Slope Prior Means
array[J] vector[K] u_gamma_jj;
for (j in 1:J) {
u_gamma_jj[j] = (u_jj[j] * gamma)';
}
/// Loop over Submodels
for (k in 1:K){
/// Declare Hyperpriors
tau[k] ~ cauchy(0, 2.5);
Omega[k] ~ lkj_corr(2);
// Group-Level Slopes
to_vector(beta[,k]) ~ multi_normal(to_vector(u_gamma_jj[,k]), quad_form_diag(Omega[k], tau[k]));
}
// Materialize Group Effects
array[N_ind] row_vector[K] x_b;
for (n in 1:N_ind){
x_b[n] = beta[jj[n],];
}
/// Varying Intercept Prior SDs
{sigma_a, sigma_b, sigma_c, sigma_d, sigma_e, sigma_f, sigma_jj} ~ normal(0,1);
// Varying Intercepts
alpha_zero ~ normal(0,1);
/// Single Intercept for dichotomous variables
alpha_a ~ normal(0, sigma_a);
alpha_b ~ normal(0, sigma_b);
/// Varying Intercept Prior Declaration
for (j in 1:J) {
alpha_jj[j] ~ normal(0, sigma_jj);
}
for (n in 1:N_c){
alpha_c[n] ~ normal(0, sigma_c);
}
for (n in 1:N_d){
alpha_d[n] ~ normal(0, sigma_d);
}
for (n in 1:N_e){
alpha_e[n] ~ normal(0, sigma_e);
}
for (n in 1:N_f){
alpha_f[n] ~ normal(0, sigma_f);
}
for (n in 1:N) {
y[n] ~ categorical_logit(alpha_zero +[alpha_a', -alpha_a'][a[n]]' + [alpha_b', -alpha_b'][b[n]]' + alpha_c[c[n]] + alpha_d[d[n]] + alpha_e[e[n]] + alpha_f[f[n]] + alpha_jj[jj[n]] + x_b[n]);
}
}