Yes, I wrote the model in Stan and managed to replicate the results from the paper mentioned above. I don’t think that it is currently possible to build this type of model with brms.
However, it takes quite long to run so I tried to implement the reduce_sum
parallelization, but I can’t get the model to compile.
Here is the model without reduce_sum
, data is attached. I used the model posted by @danielcfurr here as basis and the sum-to-zero constraint mentioned here. Running this on Windows 10 (x64, i7-10510, 16GB RAM) took roughly 40 minutes. Even though there are plenty of parameters to be estimated here, this seems somewhat long. I guess there is some potential for making this run faster by rewriting the model. I’d appreciate any suggestions here!
stan_data.Rdata (6.3 KB)
library(cmdstanr)
load("./stan_data.Rdata")
gIRT <- "// GRADED RESPONSE MODEL
data {
int<lower=1> I; // Number of items
int<lower=1> J; // Number of respondents
int<lower=1> N; // Total responses
int<lower=1,upper=I> ii[N]; // Item ID
int<lower=1,upper=J> jj[N]; // Person ID
int<lower=0> y[N]; // Responses
int<lower=1> K; // Number of covariates for location (mu) of ability (theta)
matrix[J,K] X; // Covariate matrix (including intercept) for location (mu) of ability (theta)
}
transformed data {
int r[N]; // modified response; r = 1, 2, ... m_i + 1
int m[I]; // number of difficulty parameters per item
int pos_alpha[I]; // first position in difficulty for item
int pos_delta[I]; // first position in between-step change for item
// Allowing for differing numbers of response categories
m = rep_array(0, I);
for(n in 1:N) {
r[n] = y[n] + 1;
if(y[n] > m[ii[n]]) m[ii[n]] = y[n];
}
pos_alpha[1] = 1;
pos_delta[1] = 1;
for(i in 2:(I)) {
pos_alpha[i] = pos_alpha[i-1] + m[i-1];
pos_delta[i] = pos_delta[i-1] + m[i-1] - 1;
}
}
parameters {
vector[I] eta; // First diffulty parameter for each item
vector<lower=0>[sum(m)-I] delta; // Between-step changes in difficulty
real<lower = 0> beta[I]; // Constrain all discrimination parameters to be positive
vector[J] theta; // Ability parameter
vector[J-1] mu_free; // Sum-to-zero constraint for location (mu) of ability (theta)
vector[K] gamma; // Covariate coefficients
}
transformed parameters {
vector[J] mu; // location of ability (theta)
vector[sum(m)] alpha; // Composite item difficulties
for(j in 1:(J-1)) mu[j] = mu_free[j]; // Sum-to-zero constraint for location (mu) of ability (theta)
mu[J] = -sum(mu_free);
for(i in 1:I) {
if (m[i] > 0)
alpha[pos_alpha[i]] = eta[i];
for (k in 2:m[i])
alpha[pos_alpha[i]+k-1] = alpha[pos_alpha[i]+k-2] + delta[pos_delta[i]+k-2];
}
}
model{
eta ~ normal(0,1);
delta ~ normal(0,1);
beta ~ lognormal(1,1);
gamma ~ student_t(3, 0, 1);
target += normal_lpdf(alpha | 0, 1);
target += normal_lpdf(mu | X*gamma, 1); // Mean regression
theta ~ normal(mu, 1);
for (n in 1:N) {
r[n] ~ ordered_logistic(theta[jj[n]] * beta[ii[n]],
segment(alpha, pos_alpha[ii[n]], m[ii[n]]));
}
}
"
write(gIRT, "gIRT.stan")
compiled_gIRT <- cmdstan_model(stan_file = "gIRT.stan")
fit_gIRT <- compiled_gIRT$sample(
data = stan_data,
chains = 4,
parallel_chains = 4,
iter_warmup = 500,
iter_sampling = 100,
refresh = 50
)
And here is my attempt at implementing reduce_sum
:
gIRTrs <- "// GRADED RESPONSE MODEL
functions {
real partial_sum_lpmf(int[] slice_r,
int start, int end,
int[] ii,
int[] jj,
vector theta,
real beta,
vector alpha,
int[] m,
int[] pos_alpha) {
return ordered_logistic_lupmf(slice_r |
theta[jj[start:end]] * beta[ii[start:end]],
segment(alpha, pos_alpha[ii[start:end]],
m[ii[start:end]]));
}
}
data {
int<lower=1> I; // Number of items
int<lower=1> J; // Number of respondents
int<lower=1> N; // Total responses
int<lower=1,upper=I> ii[N]; // Item ID
int<lower=1,upper=J> jj[N]; // Person ID
int<lower=0> y[N]; // Responses
int<lower=1> K; // Number of covariates for location (mu) of ability (theta)
matrix[J,K] X; // Covariate matrix (including intercept) for location (mu) of ability (theta)
int<lower=1> grainsize;
}
transformed data {
int r[N]; // modified response; r = 1, 2, ... m_i + 1
int m[I]; // number of difficulty parameters per item
int pos_alpha[I]; // first position in difficulty for item
int pos_delta[I]; // first position in between-step change for item
// Allowing for differing numbers of response categories
m = rep_array(0, I);
for(n in 1:N) {
r[n] = y[n] + 1;
if(y[n] > m[ii[n]]) m[ii[n]] = y[n];
}
pos_alpha[1] = 1;
pos_delta[1] = 1;
for(i in 2:(I)) {
pos_alpha[i] = pos_alpha[i-1] + m[i-1];
pos_delta[i] = pos_delta[i-1] + m[i-1] - 1;
}
}
parameters {
vector[I] eta; // First diffulty parameter for each item
vector<lower=0>[sum(m)-I] delta; // Between-step changes in difficulty
real<lower = 0> beta[I]; // Constrain all discrimination parameters to be positive
vector[J] theta; // Ability parameter
vector[J-1] mu_free; // Sum-to-zero constraint for location (mu) of ability (theta)
vector[K] gamma; // Covariate coefficients
}
transformed parameters {
vector[J] mu; // location of ability (theta)
vector[sum(m)] alpha; // Composite item difficulties
for(j in 1:(J-1)) mu[j] = mu_free[j]; // Sum-to-zero constraint for location (mu) of ability (theta)
mu[J] = -sum(mu_free);
for(i in 1:I) {
if (m[i] > 0)
alpha[pos_alpha[i]] = eta[i];
for (k in 2:m[i])
alpha[pos_alpha[i]+k-1] = alpha[pos_alpha[i]+k-2] + delta[pos_delta[i]+k-2];
}
}
model{
eta ~ normal(0,1);
delta ~ normal(0,1);
beta ~ lognormal(1,1);
gamma ~ student_t(3, 0, 1);
target += normal_lpdf(alpha | 0, 1);
target += normal_lpdf(mu | X*gamma, 1); // Mean regression
theta ~ normal(mu, 1);
target += reduce_sum(partial_sum_lupmf, r, grainsize,
ii, jj, theta, beta, alpha, m, pos_alpha);
}
"
write(gIRTrs, "gIRTrs.stan")
compiled_gIRTrs <- cmdstan_model("gIRTrs.stan", cpp_options = list(stan_threads = TRUE))
This throws the following error, which I can not resolve:
Semantic error
line 14, column 57 to column 76:
-------------------------------------------------
12: int[] pos_alpha) {
13: return ordered_logistic_lupmf(slice_r |
14: theta[jj[start:end]] * beta[ii[start:end]],
^
15: segment(alpha, pos_alpha[ii[start:end]],
16: m[ii[start:end]]));
-------------------------------------------------
Too many indexes, expression dimensions=0, indexes found=1
Thanks!