Hi all,
just wanted to report a difference in runtime between the current cmdstan version 2.28.2 and version 2.27.0 using within-chain parallelization via reduce_sum (4 cores, parallel_chains = 2, threads_per_chain = 2).
The R code below first uses Stan to simulate data. Then, when running the model for parameter recovery, runtime is roughly 12 minutes with version 2.27.0 and more than 25 minutes with the current version 2.28.2. With the current version Chain 2 finishes when Chain 1 is only at iteration 100/600. With version 2.27.0, the chains’ progress is roughly parallel.
Should this be expected, e.g., because the scheduler assigns threads in an inefficient way (but then Chain 2 should not take that long), or is this a bug in v2.28.2 that should be reported at github.com/stan-dev/cmdstan/issues?
Using Windows 10 x64, i7-10510 (4 cores), 16GB RAM
library(cmdstanr)
library(posterior)
library(magrittr)
library(dplyr)
library(purrr)
# change version
cmdstanr::set_cmdstan_path("./.cmdstan/cmdstan-2.27.0") # or "./.cmdstan/cmdstan-2.28.2"
# set common data for simulation --------------------------------------------------------
set.seed(10364)
I <- 10 # items
J <- 1000 # responses
m <- sample(x = 2:10, size = I, replace = TRUE)-1 # response categories for items
x_2 <- rnorm(J, 10, 5) # covariate 1
x_3 <- rbinom(J, 3, .65) # covariate 2
X <- cbind(1, x_2, x_3, x_2*x_3) # covariate matrix (incl. intercept and interactions)
#### GRADED RESPONSE MODEL ==============================================================
# stan model to simulate response -------------------------------------------------------
sim_grm <- "// GRADED RESPONSE MODEL
data {
int<lower=1> I; // Number of items
int<lower=1> J; // Number of respondents
int<lower=1> N; // Total responses
int<lower=1> m[I]; // Number of difficulty parameters per item
int<lower=1,upper=I> ii[N]; // Item ID
int<lower=1,upper=J> jj[N]; // Person ID
}
transformed data {
int pos_alpha[I]; // First position in difficulty for item
int pos_delta[I]; // First position in between-step change for item
pos_alpha[1] = 1;
pos_delta[1] = 1;
for(i in 2:(I)) {
pos_alpha[i] = pos_alpha[i-1] + m[i-1];
pos_delta[i] = pos_delta[i-1] + m[i-1] - 1;
}
}
parameters {
vector[I] eta; // First diffulty parameter for each item
vector<lower=0>[sum(m)-I] delta; // Between-step changes in difficulty
real<lower = 0> beta[I]; // Discrimination parameters - monotonicity constraint
vector[J] theta; // Ability parameter
}
transformed parameters {
vector[sum(m)] alpha; // Composite item difficulty parameters
for(i in 1:I) {
if (m[i] > 0)
alpha[pos_alpha[i]] = eta[i];
for (k in 2:m[i])
alpha[pos_alpha[i]+k-1] = alpha[pos_alpha[i]+k-2] + delta[pos_delta[i]+k-2];
}
}
model{
// Priors
eta ~ normal(0,2);
delta ~ normal(0,2);
beta ~ lognormal(1,1);
theta ~ normal(0,1);
}
generated quantities {
vector[N] y;
// Simulation
for (n in 1:N) {
y[n] = ordered_logistic_rng(theta[jj[n]] * beta[ii[n]],
segment(alpha, pos_alpha[ii[n]], m[ii[n]]));
}
}
"
write(sim_grm, "sim_grm.stan")
# compile model -------------------------------------------------------------------------
sim_grm <- cmdstan_model(stan_file = "sim_grm.stan")
# prepare data for simulation -----------------------------------------------------------
data_sim_grm <- list(I = I,
J = J,
N = I*J,
ii = rep(1:I, times = J),
jj = rep(1:J, each = I),
m = m)
# simulate response ---------------------------------------------------------------------
fit_sim_grm <- sim_grm$sample(
data = data_sim_grm,
chains = 4,
parallel_chains = 4,
iter_warmup = 1000,
iter_sampling = 1000,
refresh = 100)
# pick a draw (must cover all m) --------------------------------------------------------
set.seed(123)
draw <- sample(1:4000, 1)
# assign simulated response and true parameters from random draw ------------------------
draws_sim_grm <- fit_sim_grm$draws(c("y", "alpha", "eta", "delta", "beta", "theta")) %>%
as_draws_df()
y_sim <- draws_sim_grm %>%
dplyr::select(starts_with("y")) %>%
slice(draw) %>%
as.integer() %>%
subtract(1)
alpha_true <- draws_sim_grm %>%
dplyr::select(starts_with("alpha")) %>%
slice(draw) %>%
as.numeric()
eta_true <- draws_sim_grm %>%
dplyr::select(starts_with("eta")) %>%
slice(draw) %>%
as.numeric()
delta_true <- draws_sim_grm %>%
dplyr::select(starts_with("delta")) %>%
slice(draw) %>%
as.numeric()
beta_true <- draws_sim_grm %>%
dplyr::select(starts_with("beta")) %>%
slice(draw) %>%
as.numeric()
theta_true <- draws_sim_grm %>%
dplyr::select(starts_with("theta")) %>%
slice(draw) %>%
as.numeric()
# stan model to check parameter recovery ------------------------------------------------
grm <- "// GRADED RESPONSE MODEL
functions {
// Within-chain parallelization
real partial_sum_lpmf(int[] r_slice,
int start, int end,
int[] ii,
int[] jj,
vector theta,
real[] beta,
vector alpha,
int[] m,
int[] pos_alpha) {
real sum = 0.0;
for(i in start:end) {
sum += ordered_logistic_lupmf(r_slice[i - start + 1] | theta[jj[i]] * beta[ii[i]],
segment(alpha, pos_alpha[ii[i]],
m[ii[i]]));
}
return sum;
}
}
data {
int<lower=1> I; // Number of items
int<lower=1> J; // Number of respondents
int<lower=1> N; // Total responses
int<lower=1,upper=I> ii[N]; // Item ID
int<lower=1,upper=J> jj[N]; // Person ID
int<lower=0> y[N]; // Responses
int<lower=1> grainsize;
}
transformed data {
int r[N]; // Modified response; r = 1, 2, ... m_i + 1
int m[I]; // Number of difficulty parameters per item
int pos_alpha[I]; // First position in difficulty for item
int pos_delta[I]; // First position in between-step change for item
m = rep_array(0, I);
for(n in 1:N) {
r[n] = y[n] + 1;
if(y[n] > m[ii[n]]) m[ii[n]] = y[n];
}
pos_alpha[1] = 1;
pos_delta[1] = 1;
for(i in 2:(I)) {
pos_alpha[i] = pos_alpha[i-1] + m[i-1];
pos_delta[i] = pos_delta[i-1] + m[i-1] - 1;
}
}
parameters {
vector[I] eta; // First diffulty parameter for each item
vector<lower=0>[sum(m)-I] delta; // Between-step changes in difficulty
real<lower = 0> beta[I]; // Discrimination parameters - monotonicity constraint
vector[J] theta; // Ability parameter
}
transformed parameters {
vector[sum(m)] alpha; // Composite item difficulty parameters
for(i in 1:I) {
if (m[i] > 0)
alpha[pos_alpha[i]] = eta[i];
for (k in 2:m[i])
alpha[pos_alpha[i]+k-1] = alpha[pos_alpha[i]+k-2] + delta[pos_delta[i]+k-2];
}
}
model{
// Priors
eta ~ normal(0,2);
delta ~ normal(0,2);
beta ~ lognormal(1,1);
theta ~ normal(0,1);
// Likelihood
target += reduce_sum(partial_sum_lupmf, r, grainsize,
ii, jj, theta, beta, alpha, m, pos_alpha);
}
"
write(grm, "grm.stan")
# compile model -------------------------------------------------------------------------
grm <- cmdstan_model("grm.stan", cpp_options = list(stan_threads = TRUE))
# prepare data for parameter recovery ---------------------------------------------------
data_grm <- data_sim_grm %>%
list_modify(y = y_sim,
grainsize = 1)
# recover parameters --------------------------------------------------------------------
start_time <- Sys.time()
fit_grm <- grm$sample(
data = data_grm,
chains = 2,
parallel_chains = 2,
threads_per_chain = 2,
iter_warmup = 500,
iter_sampling = 100,
refresh = 50
)
end_time <- Sys.time()
end_time - start_time