I am trying to learn working directly in Stan by iteratively building out a hierarchical model.
I was able to get what look like reasonable results through non-centered group-level specification as discussed in this thread.
I am now trying to add in an additional covariate with group-level varying slopes for that covariate.
The following model runs, but with a lot of divergences and some NA R-hats:
data {
int<lower=1> N; // total number of observations
vector[N] y ; // response variable
int<lower=1> N_Grp; // number of groups
int<lower=1> Grp[N]; // Group indicator
int<lower=1> N_Cols; //
matrix[N, N_Cols] X1;
matrix[N, N_Cols] X2;
vector[N] X3;
vector[N] X4; //
}
parameters {
real beta1;
vector[N_Grp] beta2; //
vector[N_Grp] mu_grp; // standardized group means
real<lower=0> kappa; // precision parameter
real a;
real b;
vector<lower=0>[2] sd_grp; // group-level standard deviation
corr_matrix[2] Rho; //correlation matrix
}
transformed parameters {
vector[N_Grp] grp_int;
grp_int = (sd_grp[2] * (mu_grp)); // Group intercepts
}
model {
vector[N] mu;
// varying slopes and varying intercepts component
{
// Vector for intercept mean and slope mean
vector[2] YY[N_Grp];
vector[2] MU;
MU = [a, b]';
for (j in 1:N_Grp) YY[j] = [mu_grp[j], beta2[j]]';
YY ~ multi_normal(MU, quad_form_diag(Rho, sd_grp));
}
// Linear predictor
for (n in 1:N) {
mu[n] = grp_int[Grp[n]] + ((sum(row(X1, n) .* row(X2, n)) / X3[n]) * beta1 + beta2[Grp[n]] * X4[n]);
}
// likelihood
y ~ beta(inv_logit(mu) * kappa, (1 - inv_logit(mu)) * kappa);
// priors
beta1 ~ normal(10,10);
beta2 ~ normal(0,5);
kappa ~ gamma(0.01, 0.01);
sd_grp ~ student_t(3, 0, 2.5);
mu_grp ~ std_normal();
Rho ~ lkj_corr(1);
}
generated quantities {
vector[N] log_lik;
vector[N] y_pred;
vector[N] mu_pred;
for (n in 1:N) {
mu_pred[n] = grp_int[Grp[n]] + ((sum(row(X1, n) .* row(X2, n)) / X3[n]) * beta1 + beta2[Grp[n]] * X4[n]);
}
for (n in 1:N) {
y_pred[n] = beta_rng(inv_logit(mu_pred[n]) * kappa, (1 - inv_logit(mu_pred[n])) * kappa);
log_lik[n] = beta_lpdf(y_pred[n] | inv_logit(mu_pred[n]) * kappa, (1 - inv_logit(mu_pred[n])) * kappa);
}
}
Is there anything obviously incorrect in here that I’m missing?