Brms rejecting values

I am attempting to run an IRT model in Stan without success. I used the brms code:

model <- brms::bf(
  RT  | dec(score) ~ rotation  + (1 |p| person) + (1 |i| item),
  bs ~ rotation  + (1 |p| person) + (1 |i| item),
  ndt ~ rotation  + (1 |p| person) + (1 |i| item),
  bias = 0.5)


chains <- 1
inits_drift <- list(temp_ndt_Intercept = -3)
inits_drift <- replicate(chains, inits_drift, simplify = FALSE)



testfit <- brms::brm(model, data = standata,
                     family = brmsfamily("wiener", 
                                         "log", link_bs = "log", 
                                         link_ndt = "log"), 
                     chains = chains, cores = chains,
                     inits = inits_drift, init_r = 0.05,
                     control = list(adapt_delta = 0.99)
)

This produced the following error:

Chain 1: Rejecting initial value:
Chain 1:   Error evaluating the log probability at the initial value.
Chain 1: Exception: Exception: wiener_lpdf: Random variable  = 1.86276, but must be greater than nondecision time = 1.90094  (in 'model35f65d13a0_2840328e466b0de273afff2d35d72450' at line 17)
  (in 'model35f65d13a0_2840328e466b0de273afff2d35d72450' at line 159)

Chain 1: Rejecting initial value:
Chain 1:   Error evaluating the log probability at the initial value.
Chain 1: Exception: Exception: wiener_lpdf: Random variable  = 1.01074, but must be greater than nondecision time = 1.02514  (in 'model35f65d13a0_2840328e466b0de273afff2d35d72450' at line 19)
  (in 'model35f65d13a0_2840328e466b0de273afff2d35d72450' at line 159)

Chain 1: 
Chain 1: Initialization between (-0.05, 0.05) failed after 100 attempts. 
Chain 1:  Try specifying initial values, reducing ranges of constrained values, or reparameterizing the model.
[1] "Error in sampler$call_sampler(args_list[[i]]) : Initialization failed."                                                                      
[2] "In addition: Warning messages:"                                                                                                              
[3] "1: In system2(CXX, args = ARGS) : error in running command"                                                                                  
[4] "2: In file.remove(c(unprocessed, processed)) :"                                                                                              
[5] "  cannot remove file '/var/folders/9_/y248j5rj1rb82kfhskt4pm680000gn/T//RtmpN37SIT/file35f7ca8029e.stan', reason 'No such file or directory'"
error occurred during calling the sampler; sampling not done

Next I tried to use the make stan code function:

// generated with brms 2.13.3
functions {
  /* Wiener diffusion log-PDF for a single response
   * Args: 
   *   y: reaction time data
   *   dec: decision data (0 or 1)
   *   alpha: boundary separation parameter > 0
   *   tau: non-decision time parameter > 0
   *   beta: initial bias parameter in [0, 1]
   *   delta: drift rate parameter
   * Returns:  
   *   a scalar to be added to the log posterior 
   */ 
   real wiener_diffusion_lpdf(real y, int dec, real alpha, 
                              real tau, real beta, real delta) { 
     if (dec == 1) {
       return wiener_lpdf(y | alpha, tau, beta, delta);
     } else {
       return wiener_lpdf(y | alpha, tau, 1 - beta, - delta);
     }
   }
}
data {
  int<lower=1> N;  // number of observations
  vector[N] Y;  // response variable
  int<lower=0,upper=1> dec[N];  // decisions
  int<lower=1> K;  // number of population-level effects
  matrix[N, K] X;  // population-level design matrix
  int<lower=1> K_bs;  // number of population-level effects
  matrix[N, K_bs] X_bs;  // population-level design matrix
  int<lower=1> K_ndt;  // number of population-level effects
  matrix[N, K_ndt] X_ndt;  // population-level design matrix
  // data for group-level effects of ID 1
  int<lower=1> N_1;  // number of grouping levels
  int<lower=1> M_1;  // number of coefficients per level
  int<lower=1> J_1[N];  // grouping indicator per observation
  // group-level predictor values
  vector[N] Z_1_1;
  vector[N] Z_1_bs_2;
  vector[N] Z_1_ndt_3;
  int<lower=1> NC_1;  // number of group-level correlations
  // data for group-level effects of ID 2
  int<lower=1> N_2;  // number of grouping levels
  int<lower=1> M_2;  // number of coefficients per level
  int<lower=1> J_2[N];  // grouping indicator per observation
  // group-level predictor values
  vector[N] Z_2_1;
  vector[N] Z_2_bs_2;
  vector[N] Z_2_ndt_3;
  int<lower=1> NC_2;  // number of group-level correlations
  int prior_only;  // should the likelihood be ignored?
}
transformed data {
  real min_Y = min(Y);
  int Kc = K - 1;
  matrix[N, Kc] Xc;  // centered version of X without an intercept
  vector[Kc] means_X;  // column means of X before centering
  int Kc_bs = K_bs - 1;
  matrix[N, Kc_bs] Xc_bs;  // centered version of X_bs without an intercept
  vector[Kc_bs] means_X_bs;  // column means of X_bs before centering
  int Kc_ndt = K_ndt - 1;
  matrix[N, Kc_ndt] Xc_ndt;  // centered version of X_ndt without an intercept
  vector[Kc_ndt] means_X_ndt;  // column means of X_ndt before centering
  for (i in 2:K) {
    means_X[i - 1] = mean(X[, i]);
    Xc[, i - 1] = X[, i] - means_X[i - 1];
  }
  for (i in 2:K_bs) {
    means_X_bs[i - 1] = mean(X_bs[, i]);
    Xc_bs[, i - 1] = X_bs[, i] - means_X_bs[i - 1];
  }
  for (i in 2:K_ndt) {
    means_X_ndt[i - 1] = mean(X_ndt[, i]);
    Xc_ndt[, i - 1] = X_ndt[, i] - means_X_ndt[i - 1];
  }
}
parameters {
  vector[Kc] b;  // population-level effects
  real Intercept;  // temporary intercept for centered predictors
  vector[Kc_bs] b_bs;  // population-level effects
  real Intercept_bs;  // temporary intercept for centered predictors
  vector[Kc_ndt] b_ndt;  // population-level effects
  real Intercept_ndt;  // temporary intercept for centered predictors
  vector<lower=0>[M_1] sd_1;  // group-level standard deviations
  matrix[M_1, N_1] z_1;  // standardized group-level effects
  cholesky_factor_corr[M_1] L_1;  // cholesky factor of correlation matrix
  vector<lower=0>[M_2] sd_2;  // group-level standard deviations
  matrix[M_2, N_2] z_2;  // standardized group-level effects
  cholesky_factor_corr[M_2] L_2;  // cholesky factor of correlation matrix
}
transformed parameters {
  real<lower=0,upper=1> bias = 0.5;  // initial bias parameter
  matrix[N_1, M_1] r_1;  // actual group-level effects
  // using vectors speeds up indexing in loops
  vector[N_1] r_1_1;
  vector[N_1] r_1_bs_2;
  vector[N_1] r_1_ndt_3;
  matrix[N_2, M_2] r_2;  // actual group-level effects
  // using vectors speeds up indexing in loops
  vector[N_2] r_2_1;
  vector[N_2] r_2_bs_2;
  vector[N_2] r_2_ndt_3;
  // compute actual group-level effects
  r_1 = (diag_pre_multiply(sd_1, L_1) * z_1)';
  r_1_1 = r_1[, 1];
  r_1_bs_2 = r_1[, 2];
  r_1_ndt_3 = r_1[, 3];
  // compute actual group-level effects
  r_2 = (diag_pre_multiply(sd_2, L_2) * z_2)';
  r_2_1 = r_2[, 1];
  r_2_bs_2 = r_2[, 2];
  r_2_ndt_3 = r_2[, 3];
}
model {
  // initialize linear predictor term
  vector[N] mu = Intercept + Xc * b;
  // initialize linear predictor term
  vector[N] bs = Intercept_bs + Xc_bs * b_bs;
  // initialize linear predictor term
  vector[N] ndt = Intercept_ndt + Xc_ndt * b_ndt;
  for (n in 1:N) {
    // add more terms to the linear predictor
    mu[n] += r_1_1[J_1[n]] * Z_1_1[n] + r_2_1[J_2[n]] * Z_2_1[n];
  }
  for (n in 1:N) {
    // add more terms to the linear predictor
    bs[n] += r_1_bs_2[J_1[n]] * Z_1_bs_2[n] + r_2_bs_2[J_2[n]] * Z_2_bs_2[n];
  }
  for (n in 1:N) {
    // add more terms to the linear predictor
    ndt[n] += r_1_ndt_3[J_1[n]] * Z_1_ndt_3[n] + r_2_ndt_3[J_2[n]] * Z_2_ndt_3[n];
  }
  for (n in 1:N) {
    // apply the inverse link function
    bs[n] = exp(bs[n]);
  }
  for (n in 1:N) {
    // apply the inverse link function
    ndt[n] = exp(ndt[n]);
  }
  for (n in 1:N) {
    // apply the inverse link function
    mu[n] = exp(mu[n]);
  }
  // priors including all constants
  target += normal_lpdf(b | 0, 5);
  target += student_t_lpdf(Intercept | 3, 1.4, 2.5);
  target += normal_lpdf(Intercept_bs | -0.6, 1.3);
  target += student_t_lpdf(sd_1 | 3, 0, 2.5)
    - 3 * student_t_lccdf(0 | 3, 0, 2.5);
  target += std_normal_lpdf(to_vector(z_1));
  target += lkj_corr_cholesky_lpdf(L_1 | 1);
  target += student_t_lpdf(sd_2 | 3, 0, 2.5)
    - 3 * student_t_lccdf(0 | 3, 0, 2.5);
  target += std_normal_lpdf(to_vector(z_2));
  target += lkj_corr_cholesky_lpdf(L_2 | 1);
  // likelihood including all constants
  if (!prior_only) {
    for (n in 1:N) {
      target += wiener_diffusion_lpdf(Y[n] | dec[n], bs[n], ndt[n], bias, mu[n]);
    }
  }
}
generated quantities {
  // actual population-level intercept
  real b_Intercept = Intercept - dot_product(means_X, b);
  // actual population-level intercept
  real b_bs_Intercept = Intercept_bs - dot_product(means_X_bs, b_bs);
  // actual population-level intercept
  real b_ndt_Intercept = Intercept_ndt - dot_product(means_X_ndt, b_ndt);
  // compute group-level correlations
  corr_matrix[M_1] Cor_1 = multiply_lower_tri_self_transpose(L_1);
  vector<lower=-1,upper=1>[NC_1] cor_1;
  // compute group-level correlations
  corr_matrix[M_2] Cor_2 = multiply_lower_tri_self_transpose(L_2);
  vector<lower=-1,upper=1>[NC_2] cor_2;
  // extract upper diagonal of correlation matrix
  for (k in 1:M_1) {
    for (j in 1:(k - 1)) {
      cor_1[choose(k - 1, 2) + j] = Cor_1[j, k];
    }
  }
  // extract upper diagonal of correlation matrix
  for (k in 1:M_2) {
    for (j in 1:(k - 1)) {
      cor_2[choose(k - 1, 2) + j] = Cor_2[j, k];
    }
  }
}

This also produces the rejection error message.

Have you tried to run the example IRT model in brms and Stan first? https://arxiv.org/abs/1905.09501 If that runs fine then we can dive into the code.

Can you post your version of R, RStudio, brms, and rstan?

thanks

1 Like

Yes I have successfully ran the example IRT model in Stan. Using brms is new but I thought the make_stan_code function could resolve the issues I have been having writing my own code.

Versions:
Rstudio 1.3.959
R version 4.0.2 (2020-06-22)
Platform: x86_64-apple-darwin17.0 (64-bit)
Running under: macOS Catalina 10.15.4

rstan_2.21.1
brms 2.13.3

I would also like to note that my dataset does not use ‘rotation’ but I included this in my first code

RT  | dec(score) ~ rotation  + (1 |p| person) + (1 |i| item)

because I was trying to change as little as possible but I assume this can be replaced with a 1 for an intercept?

1 Like

It was determined the dataset was the issue, as you hinted. I appreciate your help!!

1 Like