Hey all!
Extending work mentioned here, I’ve been developing a hierarchical SEM model where:
- Latent variable
z[i]
is fit to predict a common (Likert scale) survey itemy_z
for individuali
via ordinal regression - Latent variable
t[m,i]
is defined for individuali
and topicm
, where latent variablez[i]
is a predictor oft[m,i]
with varying slopedelta[m]
(this slope parameterdelta
varies by topic) - Cutpoints
c_n[n]
for responsesy_n
to survey itemn
(Likert scale) are fit via ordinal logistic regression oft_m
, wherem
is the topic corresponding to itemn
- Latent variables,
z[i]
,t[m_1,i]
, …t[M,i]
are modeled to have a covarying error term for individuali
I’m experiencing a fitting issue where, when I rerun my model under different randomization defaults,
- All my parameters “converge” in the sense of returning
R_hat = 1.0
, but - I recovery consistent values for all my parameters except those related to the covarying individual error term (parameters
tau
,L_Omega
, andtau_L_Omega
and downstream generated quantitiesOmega
andSigma
)
I’ve tried changing the prior for my scale parameter tau
from the recommended cauchy(0,2.5)
something less diffuse (cauchy(0,1)
, student_t(8,0,1)
, etc.) and when I do so I find that my covariance terms vary less from randomization default to randomization default, but remain quite noisy. I’m not sure whether:
- I may have mis-specified my coding of the non-centered, Cholesky-factored covarying error term
- My error term might be non-identifiable in this model as specified (I believe the term is identifiable since the associated parameters consistently return
R_hat
values of 1.0, but I recognize that I may be mis-interpreting this QC output) - I’m suffering from some third source of error that I haven’t considered
Appreciate any guidance that folks might have to impart here. Thanks in advance!
data {
// Metadata
int<lower=0> I; // Number of Interviews
int<lower=0> J; // Number of Item-Response Pairs for Issue Topic Items
int<lower=0> M; // Number of Topics
int<lower=0> N; // Number of Survey Items
int<lower=0> Q; // Number of Common Item Response Classes
int<lower=0> R; // Number of Issue Topic Item Response Classes
// Discrete Predictor Level Declarations
int<lower=0> N_a;
int<lower=0> N_b;
// PII-Survey Data
/// Individual Demography
array[I] int<lower=1,upper=N_a> a;
array[I] int<lower=1,upper=N_b> b;
/// Common Item Response Labels
array[I] int<lower=1,upper=Q> y_z;
// Issue Topic-Item Data
/// Metadata
array[N] int<lower=1,upper=M> m_n; // Item-Level Topic Index
/// Responses
array[J] int<lower=1,upper=I> i_j; // Item-Response Respondent ID
array[J] int<lower=1,upper=N> n_j; // Item-Response Item ID
array[J] int<lower=1,upper=R> y_n_j; // Item-Response Response Labels
}
parameters {
// Target Cutpoints
ordered[Q - 1] c_z; // Cutpoints for common item
array[N] ordered[R - 1] c_t; // Cutpoints for topic items
// Latent Hierarchical Model for Common Item
/// Hyperprior Uncertainties for Varying Intercepts
real<lower=0> sigma_alpha_a;
real<lower=0> sigma_alpha_b;
/// Modeled Effects
vector[N_a] alpha_a_raw;
vector[N_b] alpha_b_raw;
// Latent Hierarchical Model for Topic Items
/// Hyperprior Uncertainties for Varying Intercepts, Slopes
vector<lower=0>[M] sigma_beta_a;
vector<lower=0>[M] sigma_delta;
/// Modeled Effects
array[M] vector[N_a] beta_a_raw;
vector[M] delta_raw; // varying slope for effect of latent variable representing *common item* in predicting *topic items*
// Correlated Error Parameters
/// Hyperpriors
vector<lower=0>[M_K + 1] tau; // prior scale
cholesky_factor_corr[M_K + 1] L_Omega; // Cholesky-factored prior covariance
/// Centered Cholesky Factored Covariance Term
vector[M_K + 1] tau_L_Omega_raw;
}
transformed parameters{
// Latent Hierarchical Model for Common Item
/// Vector codings for multilevel intercepts
vector[N_a] alpha_a = alpha_a_raw * sigma_alpha_a;
vector[N_b] alpha_b = alpha_b_raw * sigma_alpha_b;
// Latent Hierarchical Model for Topic Items
/// Varying Intercepts, Slopes
array[M] vector[N_a] beta_a = beta_a_raw .* sigma_beta_a;
vector[M] delta = delta_raw .* sigma_delta;
// Cholesky Factored Covariance Term
vector[M_K + 1] tau_L_Omega = diag_pre_multiply(tau, L_Omega) * tau_L_Omega_raw;
}
model {
// Latent Hierarchical Model Parameters for Common Item
/// Prior Uncertainties
sigma_alpha_a ~ std_normal();
sigma_alpha_b ~ std_normal();
/// Varying Intercepts
alpha_a_raw ~ std_normal();
alpha_b_raw ~ std_normal();
// Latent Hierarchical Model Parameters for Topic Items
/// Hyperprior Uncertainties for Varying Intercepts, Slopes
sigma_beta_a ~ std_normal();
sigma_delta ~ std_normal();
/// Varying Intercepts, Slopes
for (m in 1:M){
beta_a_raw[m] ~ std_normal();
}
delta_raw ~ std_normal();
// Latent Topic Correlation
tau ~ cauchy(0,2);
L_Omega ~ lkj_corr_cholesky(2);
tau_L_Omega_raw ~ std_normal();
// Declare Local Scope for Latent Variable Vectorization
{
// Declare Latent Core Item Variable z, Topic Items variables t_m
vector[I] z = alpha_a[a] + alpha_b[b] + tau_L_Omega[1];
array[M] vector[I] t_m_i;
// Vectorize Latent Variables for Core Item, Topics
for (m in 1:M){
t_m_i[m,] = beta_a[m,a] + delta[m] * z + tau_L_Omega[m + 1];
}
// Reindex Latent Topic Variables from individuals to item-reponse pairs
vector[J] t_j = t_m_i[m_n[n_j],i_j];
// Fit responses to Core Item, Topic Items
y_z ~ ordered_logistic(z, c_z);
y_n_j ~ ordered_logistic(t_j, c_n[n_j]);
}
}
generated quantities{
matrix[M_K + 1, M_K + 1] Omega = multiply_lower_tri_self_transpose(L_Omega);
matrix[M_K + 1, M_K + 1] Sigma = quad_form_diag(Omega, tau);
}
Note: code has been edited for clarity from initial submission