Hi all,
we have programmed a multidimensional graded IRT which we would like to extent by including group-level predictors. The model relies on the QR-based sum-to-zero constraint outline here for the expectations of the multivariate normal prior on the (person-specific) latent variable Theta. What we would like to add is having the expectations of the multivariate normal prior for Theta depend on person-level predictors, i.e. an additional group-level model, where respondents denote groups.
Below is the Stan program without the group-level model. This works fine.
functions {
// Sum-to-zero constraint based on QR decomposition
// source: https://discourse.mc-stan.org/t/test-soft-vs-hard-sum-to-zero-constrain-choosing-the-right-prior-for-soft-constrain/3884/31
vector Q_sum_to_zero_QR(int N) {
vector [2*N] Q_r;
for(i in 1:N) {
Q_r[i] = -sqrt((N-i)/(N-i+1.0));
Q_r[i+N] = inv_sqrt((N-i) * (N-i+1));
}
return Q_r;
}
vector sum_to_zero_QR(vector x_raw, vector Q_r) {
int N = num_elements(x_raw) + 1;
vector [N] x;
real x_aux = 0;
for(i in 1:N-1){
x[i] = x_aux + x_raw[i] * Q_r[i];
x_aux = x_aux + x_raw[i] * Q_r[i+N];
}
x[N] = x_aux;
return x;
}
}
data {
int<lower=1> I; // Number of items
int<lower=1> J; // Number of respondents
int<lower=1> L; // Number of items by country
int<lower=1> N; // Total responses
int<lower=1> D; // Dimensions
int<lower=1> K; // Number of respondent-level predictors
array[N] int<lower=1, upper=I> ii; // Item ID
array[N] int<lower=1, upper=J> jj; // Respondent ID
array[N] int<lower=1, upper=L> ll; // Item by country ID
array[N] int<lower=1, upper=D> dd; // Dimension ID
array[N] int<lower=0> y; // Responses
}
transformed data {
array[N] int r; // Modified response; r = 1, 2, ... m_i + 1
array[I] int m; // Number of item-specific intercepts (difficulty)
array[L] int p; // Number of item by country-specific intercepts (item bias/nonequivalence)
array[I] int pos_alpha; // First position in item intercepts
array[I] int pos_delta_alpha; // First position in between step change for item intercepts
array[L] int pos_zeta; // First position in item by country intercepts
array[L] int pos_delta_zeta; // First position in between step change for item by country intercepts
vector[2*J] Q_r;
real mu_raw_sigma;
m = rep_array(0, I);
p = rep_array(0, L);
for (n in 1 : N) {
r[n] = y[n] + 1;
if (y[n] > m[ii[n]])
m[ii[n]] = y[n];
}
for (n in 1 : N) {
if (y[n] > p[ll[n]])
p[ll[n]] = y[n];
}
pos_alpha[1] = 1;
pos_delta_alpha[1] = 1;
for (i in 2 : (I)) {
pos_alpha[i] = pos_alpha[i - 1] + m[i - 1];
pos_delta_alpha[i] = pos_delta_alpha[i - 1] + m[i - 1] - 1;
}
pos_zeta[1] = 1;
pos_delta_zeta[1] = 1;
for (i in 2 : (L)) {
pos_zeta[i] = pos_zeta[i - 1] + p[i - 1];
pos_delta_zeta[i] = pos_delta_zeta[i - 1] + p[i - 1] - 1;
}
// QR Decomposition
Q_r = Q_sum_to_zero_QR(J); // For sum-to-zero constraint
mu_raw_sigma = inv_sqrt(1 - inv(J));
}
parameters {
vector[I] eta; // First intercept parameter for each item
vector[L] kappa; // First intercept parameter for each item by country
vector<lower=0>[sum(m) - I] delta_alpha; // Between-step changes in item intercepts
vector<lower=0>[sum(p) - L] delta_zeta; // Between-step changes in item by country intercepts
array[I] real<lower=0> beta; // Item slopes (discrimination)
cholesky_factor_corr[D] L_Omega; // Cholesky factor of correlation matrix (off-diagonal elements)
vector<lower=0, upper=pi()/2>[D] tau_unif; // Scale factor (diagonal elements)
matrix[D, J] Theta_raw; // Multivariate normal
matrix[J-1, D] Mu_raw; // Sum-to-zero
real<lower=0> sigma_eta;
real<lower=0> sigma_kappa;
real<lower=0> sigma_delta_alpha;
real<lower=0> sigma_delta_zeta;
real<lower=0> sigma_beta;
}
transformed parameters {
vector[sum(m)] alpha; // Composite item intercepts
vector[sum(p)] zeta; // Composite item by country intercepts
vector<lower=0>[D] tau; // Reparameterized scale factor
matrix[D, D] L_Sigma; // Cholesky factor of covariance matrix
matrix[D, J] Theta; // Multidimensional latent variable (ability)
matrix[J, D] Mu; // Expectations
for (i in 1 : I) {
if (m[i] > 0)
alpha[pos_alpha[i]] = eta[i];
for (k in 2 : m[i])
alpha[pos_alpha[i] + k - 1] = alpha[pos_alpha[i] + k - 2]
+ delta_alpha[pos_delta_alpha[i] + k - 2];
}
for (i in 1 : L) {
if (p[i] > 0)
zeta[pos_zeta[i]] = kappa[i];
for (k in 2 : p[i])
zeta[pos_zeta[i] + k - 1] = zeta[pos_zeta[i] + k - 2]
+ delta_zeta[pos_delta_zeta[i] + k - 2];
}
for (d in 1:D) {
Mu[,d] = sum_to_zero_QR(Mu_raw[,d], Q_r); // Sum-to-zero constraint
}
tau = 2.5 * tan(tau_unif); // Prior on scale factor
L_Sigma = diag_pre_multiply(tau, L_Omega); // Decomposed cholesky factor of covariance matrix (scale factor and cholesky factor of correlation matrix)
Theta = Mu' + L_Sigma * Theta_raw; // Multivariate respondent-level model (non-centered parameterization)
}
model {
vector[D] theta_j;
// Priors
eta ~ normal(0, sigma_eta);
kappa ~ normal(0, sigma_kappa);
delta_alpha ~ normal(0, sigma_delta_alpha);
delta_zeta ~ normal(0, sigma_delta_zeta);
beta ~ lognormal(1, sigma_beta);
L_Omega ~ lkj_corr_cholesky(2);
for (d in 1:D) {
Mu_raw[,d] ~ normal(0, mu_raw_sigma);
}
to_vector(Theta_raw) ~ std_normal();
sigma_eta ~ normal(0, 1);
sigma_kappa ~ normal(0, 1);
sigma_delta_alpha ~ normal(0, 1);
sigma_delta_zeta ~ normal(0, 1);
sigma_beta ~ normal(0, 1);
// Likelihood
for (n in 1:N) {
theta_j = Theta[, jj[n]];
r[n] ~ ordered_logistic(theta_j[dd[n]] * beta[ii[n]],
segment(alpha, pos_alpha[ii[n]], m[ii[n]])
+ segment(zeta, pos_zeta[ll[n]], p[ll[n]]));
}
}
Our initial thought was to include the group-level model by adding the following components to the program:
data {
matrix[J, K] Z; // Design matrix of respondent-level covariates
// ...
}
parameter {
matrix[K,D] Gamma; // Regression coefficients
// ...
}
model {
for (d in 1:D) {
Mu_raw[,d] ~ normal(Z * Gamma[,d], mu_raw_sigma);
}
// ...
}
However, this does not work as Mu_raw has dimensions [J-1,D] and Z * Gamma has dimensions [J, D]. So we tried the following instead.
transformed parameters {
for (d in 1:D) {
Mu[,d] = sum_to_zero_QR(Mu_raw[,d], Q_r) + Z * Gamma[,d]; // Sum-to-zero constraint and regression
}
// ...
}
But here the coefficients Gamma all go to zero. We also tried putting the sum-to-zero-constraint directly on Gamma, omitting Mu and have:
transformed parameters {
for (d in 1:D) {
Gamma[,d] = sum_to_zero_QR(Gamma_raw[,d], Q_r_gamma);
}
// ...
Theta = (Z * Gamma)' + L_Sigma * Theta_raw;
// ...
}
But then neither Theta nor Gamma converge.
We are grateful for any suggestions on how to add the group-level predictors here.