I’m still getting used to Stan’s syntax and I wanted to make sure that I’m doing this correctly.
I’m working with a hierarchical regression where I’m modeling the contribution of latent factors (\vec{\mu_a}, \vec{\mu_b}, etc.) to participants’ responses. If I want to sample a JxK vector of weights (\vec{\beta}) from a normal distribution given a K-d vector of means (\vec{\mu}) and a covariance matrix (\Sigma), e.g. \vec{\beta} \sim \mathcal{N}( \vec{\mu}, \Sigma), where \vec{\mu} is the sum of the latent factors, is the right way to do that to split the variables up like this (full model below). The model seems to be working but some of the CI are much larger than I was expecting from visual inspection so I wanted to check the model is doing what I think it is. Thanks a ton for the help (feedback on any part of the model is super welcome).
parameters {
vector[K] mu_a;
vector[K] mu_b;
vector[K] mu_c;
matrix[K, J] z;
vector<lower=0>[K] sd;
cholesky_factor_corr[K] L;
}
transformed parameters {
matrix[J, K] beta = (diag_pre_multiply(sd, L) * z)';
}
model {
vector[K] mu[J];
for (j in 1:J) {
mu[j] = a[j] * mu_a + b[j] * mu_b + c[j] * mu_c;
// a[j] is scaler, mu_a is a K-d vector
}
for (j in 1:J) {
for (k in 1:K) {
target += normal_lpdf(z[k,j] | mu[j][k], 1);
}
}
}
full model:
data {
int<lower=1> N; // number of observations
int Y[N]; // response variable
int<lower=2> ncat; // number of categories
int<lower=0> instructions[N]; // dummy coded var
// data for stimuli
int<lower=1> stimuli[N];
int<lower=1> n_stimuli;
int<lower=1> M_1;
int<lower=1> NC_1;
// data for participants
int<lower=1> participants[N];
int<lower=1> n_participants;
int<lower=0> diagnosis_bysubject[n_participants]; // dummy coded
real working_memory_errors_zscore_bysubject[n_participants];
int<lower=1> M_2;
int<lower=1> NC_2;
}
transformed data {
vector[M_2] mu_participant_blank[n_participants];
vector[N] mu_response_blank;
for (j in 1:n_participants) {
for (k in 1:M_2) {
mu_participant_blank[j][k] = 0; // initialize
}
}
for (n in 1:N) {
mu_response_blank[n] = 0; // initialize
}
}
parameters {
ordered[ncat-1] temp_Intercept; // temporary thresholds
vector[M_2] mu_p; // <mu_p0, mu_pr> values
vector[M_2] mu_pd; // <mu_pd0, mu_pdr> values
vector[M_2] mu_pw; // <mu_pw0, mu_pwr> values
vector[M_2] mu_pdw; // <mu_pdw0, mu_pdwr> values
matrix[M_1, n_stimuli] z_1; // unscaled group-level effects
vector<lower=0>[M_1] sd_1; // group-level standard deviations
cholesky_factor_corr[M_1] L_1; // cholesky factor of correlation matrix
matrix[M_2, n_participants] z_2; // unscaled group-level effects
vector<lower=0>[M_2] sd_2; // group-level standard deviations
cholesky_factor_corr[M_2] L_2; // cholesky factor of correlation matrix
}
transformed parameters {
// group-level effects 1 -- stimuli
matrix[n_stimuli, M_1] beta_x = (diag_pre_multiply(sd_1, L_1) * z_1)'; // (n_stimuli x 2) matrix of beta weights
vector[n_stimuli] beta_x0 = beta_x[, 1]; // rand intercepts (n_stimuli x 1)
vector[n_stimuli] beta_xr = beta_x[, 2]; // rand slopes (n_stimuli x 1)
// group-level effects 2 -- participants
matrix[n_participants, M_2] beta_p = (diag_pre_multiply(sd_2, L_2) * z_2)'; // (n_participants x 2) matrix of beta weights
vector[n_participants] beta_p0 = beta_p[, 1]; // rand intercepts (n_participants x 1)
vector[n_participants] beta_pr = beta_p[, 2]; // rand slopes (n_participants x 1)
}
model {
vector[M_2] mu_participant[n_participants] = mu_participant_blank; // n-d vector of <mu_p0, mu_pr> values
vector[N] mu_response = mu_response_blank;
////////////////////// update beta_x (stimuli)
target += normal_lpdf(to_vector(z_1) | 0, 1);
target += student_t_lpdf(sd_1 | 3, 0, 10)
- 2 * student_t_lccdf(0 | 3, 0, 10);
target += lkj_corr_cholesky_lpdf(L_1 | 1);
////////////////////// update beta_p (participants)
for (j in 1:n_participants) {
mu_participant[j] =
mu_p +
diagnosis_bysubject[j] * mu_pd +
working_memory_errors_zscore_bysubject[j] * mu_pw +
(diagnosis_bysubject[j] * working_memory_errors_zscore_bysubject[j]) * mu_pdw;
}
for (j in 1:n_participants) {
for (k in 1:M_2) {
target += normal_lpdf(z_2[k,j] | mu_participant[j][k], 1);
}
}
target += student_t_lpdf(sd_2 | 3, 0, 10)
- 2 * student_t_lccdf(0 | 3, 0, 10);
target += lkj_corr_cholesky_lpdf(L_2 | 1);
//////////////////////
for (n in 1:N) {
mu_response[n] =
beta_x0[stimuli[n]] * 1 + // stimulus intercept
beta_xr[stimuli[n]] * instructions[n] + // stimulus slope
beta_p0[participants[n]] * 1 + // participant intercept
beta_pr[participants[n]] * instructions[n]; // participant slope
}
target += student_t_lpdf(temp_Intercept | 3, 0, 10);
// likelihood including all constants
for (n in 1:N) {
target += ordered_logistic_lpmf(Y[n] | mu_response[n], temp_Intercept);
}
}
generated quantities {
corr_matrix[M_1] Cor_1 = multiply_lower_tri_self_transpose(L_1);
vector<lower=-1,upper=1>[NC_1] cor_1;
corr_matrix[M_2] Cor_2 = multiply_lower_tri_self_transpose(L_2);
vector<lower=-1,upper=1>[NC_2] cor_2;
// take only relevant parts of correlation matrix
cor_1[1] = Cor_1[1,2];
// take only relevant parts of correlation matrix
cor_2[1] = Cor_2[1,2];
}