"y is not positive definite" error with categorical but not monotonic effects

  • Operating System: Windows 10
  • brms Version: 2.13.0

I have a multivariate mixed effects logistic regression model that includes monotonic effects for several six-category ordinal predictors. This model converges well and looks fine.

# Good
fit <- brm(
  formula = 
    mvbind(y1, y2, y3) ~
    1 + Gender + mo(x1) + mo(x2) + mo(x3) + mo(x4) +
    (1 + mo(x1) + mo(x2) + mo(x3) + mo(x4) | Subject),
  prior = c(
    set_prior("lkj(1)", class = "cor"),
    set_prior("student_t(3, 0, 1)", class = "Intercept", resp = "y1"),
    set_prior("student_t(3, 0, 1)", class = "Intercept", resp = "y2"),
    set_prior("student_t(3, 0, 1)", class = "Intercept", resp = "y3"),
    set_prior("normal(0, 1)", class = "b", resp = "y1"),
    set_prior("normal(0, 1)", class = "b", resp = "y2"),
    set_prior("normal(0, 1)", class = "b", resp = "y3"),
    set_prior("student_t(3, 0, 2.5)", class = "sd", resp = "y1"),
    set_prior("student_t(3, 0, 2.5)", class = "sd", resp = "y2"),
    set_prior("student_t(3, 0, 2.5)", class = "sd", resp = "y3"),
    set_prior("dirichlet(1, 1, 1, 1, 1)", class = "simo", coef = "mox11", resp = "y1"),
    set_prior("dirichlet(1, 1, 1, 1, 1)", class = "simo", coef = "mox21", resp = "y1"),
    set_prior("dirichlet(1, 1, 1, 1, 1)", class = "simo", coef = "mox31", resp = "y1"),
    set_prior("dirichlet(1, 1, 1, 1, 1)", class = "simo", coef = "mox41", resp = "y1"),
    set_prior("dirichlet(1, 1, 1, 1, 1)", class = "simo", coef = "mox11", resp = "y2"),
    set_prior("dirichlet(1, 1, 1, 1, 1)", class = "simo", coef = "mox21", resp = "y2"),
    set_prior("dirichlet(1, 1, 1, 1, 1)", class = "simo", coef = "mox31", resp = "y2"),
    set_prior("dirichlet(1, 1, 1, 1, 1)", class = "simo", coef = "mox41", resp = "y2"),
    set_prior("dirichlet(1, 1, 1, 1, 1)", class = "simo", coef = "mox11", resp = "y3"),
    set_prior("dirichlet(1, 1, 1, 1, 1)", class = "simo", coef = "mox21", resp = "y3"),
    set_prior("dirichlet(1, 1, 1, 1, 1)", class = "simo", coef = "mox31", resp = "y3"),
    set_prior("dirichlet(1, 1, 1, 1, 1)", class = "simo", coef = "mox41", resp = "y3")
  ),
  family = bernoulli,
  data = dat,
  chains = 8,
  cores = 8
)

However, I want to compare this model to one in which the monotonic assumption is relaxed (via loo_compare()). So I rescaled the variables to be unordered factors and reestimated the model without monotonic effects.

# Error
dat_cat <- dat %>% 
  mutate_at(vars(x1, x2, x3, x4), factor, ordered = FALSE)
fit_cat <- brm(
  formula = 
    mvbind(y1, y2, y3) ~
    1 + Gender + x1 + x2 + x3 + x4 +
    (1 + x1 + x2 + x3 + x4 | Subject),
  prior = c(
    set_prior("lkj(1)", class = "cor"),
    set_prior("student_t(3, 0, 1)", class = "Intercept", resp = "y1"),
    set_prior("student_t(3, 0, 1)", class = "Intercept", resp = "y2"),
    set_prior("student_t(3, 0, 1)", class = "Intercept", resp = "y3"),
    set_prior("normal(0, 1)", class = "b", resp = "y1"),
    set_prior("normal(0, 1)", class = "b", resp = "y2"),
    set_prior("normal(0, 1)", class = "b", resp = "y3"),
    set_prior("student_t(3, 0, 2.5)", class = "sd", resp = "y1"),
    set_prior("student_t(3, 0, 2.5)", class = "sd", resp = "y2"),
    set_prior("student_t(3, 0, 2.5)", class = "sd", resp = "y3")
  ),
  family = bernoulli,
  data = dat_cat,
  chains = 8,
  cores = 8
)

However, this model returns the following error:

Exception: validate transformed params: y is not positive definite.

Does this have to do with the sparsity of some categories, or did I mess up my priors somehow?

Thanks in advance for any advice.

Hey, do you have a minimal reproducible example for me?

Here is an example with simulated data:

library(brms)

# Simulate data without relationships and ignoring clustering

n <-  750

y1 <- rbinom(n, size = 1, prob = 0.354)
y2 <- rbinom(n, size = 1, prob = 0.313)
y3 <- rbinom(n, size = 1, prob = 0.241)

x1 <- sample(0:5, size = n, replace = TRUE, prob = c(0.535, 0.065, 0.089, 0.146, 0.132, 0.032))
x2 <- sample(0:5, size = n, replace = TRUE, prob = c(0.675, 0.028, 0.049, 0.076, 0.099, 0.073))
x3 <- sample(0:5, size = n, replace = TRUE, prob = c(0.767, 0.043, 0.057, 0.055, 0.057, 0.021))
x4 <- sample(0:5, size = n, replace = TRUE, prob = c(0.879, 0.003, 0.005, 0.011, 0.045, 0.057))

Subject <- sample(1:124, size = n, replace = TRUE)

dat <- data.frame(Subject, y1, y2, y3)
dat$x1 <- factor(x1, levels = 0:5, ordered = TRUE)
dat$x2 <- factor(x2, levels = 0:5, ordered = TRUE)
dat$x3 <- factor(x3, levels = 0:5, ordered = TRUE)
dat$x4 <- factor(x4, levels = 0:5, ordered = TRUE)

dat_cat <- data.frame(Subject, y1, y2, y3)
dat_cat$x1 <- factor(x1, levels = 0:5, ordered = FALSE)
dat_cat$x2 <- factor(x2, levels = 0:5, ordered = FALSE)
dat_cat$x3 <- factor(x3, levels = 0:5, ordered = FALSE)
dat_cat$x4 <- factor(x4, levels = 0:5, ordered = FALSE)

# Estimate model with monotonic effects

fit <- brm(
  formula = 
    mvbind(y1, y2, y3) ~
    1 + mo(x1) + mo(x2) + mo(x3) + mo(x4) +
    (1 + mo(x1) + mo(x2) + mo(x3) + mo(x4) | Subject),
  prior = c(
    set_prior("lkj(1)", class = "cor"),
    set_prior("student_t(3, 0, 1)", class = "Intercept", resp = "y1"),
    set_prior("student_t(3, 0, 1)", class = "Intercept", resp = "y2"),
    set_prior("student_t(3, 0, 1)", class = "Intercept", resp = "y3"),
    set_prior("normal(0, 1)", class = "b", resp = "y1"),
    set_prior("normal(0, 1)", class = "b", resp = "y2"),
    set_prior("normal(0, 1)", class = "b", resp = "y3"),
    set_prior("student_t(3, 0, 2.5)", class = "sd", resp = "y1"),
    set_prior("student_t(3, 0, 2.5)", class = "sd", resp = "y2"),
    set_prior("student_t(3, 0, 2.5)", class = "sd", resp = "y3"),
    set_prior("dirichlet(1, 1, 1, 1, 1)", class = "simo", coef = "mox11", resp = "y1"),
    set_prior("dirichlet(1, 1, 1, 1, 1)", class = "simo", coef = "mox21", resp = "y1"),
    set_prior("dirichlet(1, 1, 1, 1, 1)", class = "simo", coef = "mox31", resp = "y1"),
    set_prior("dirichlet(1, 1, 1, 1, 1)", class = "simo", coef = "mox41", resp = "y1"),
    set_prior("dirichlet(1, 1, 1, 1, 1)", class = "simo", coef = "mox11", resp = "y2"),
    set_prior("dirichlet(1, 1, 1, 1, 1)", class = "simo", coef = "mox21", resp = "y2"),
    set_prior("dirichlet(1, 1, 1, 1, 1)", class = "simo", coef = "mox31", resp = "y2"),
    set_prior("dirichlet(1, 1, 1, 1, 1)", class = "simo", coef = "mox41", resp = "y2"),
    set_prior("dirichlet(1, 1, 1, 1, 1)", class = "simo", coef = "mox11", resp = "y3"),
    set_prior("dirichlet(1, 1, 1, 1, 1)", class = "simo", coef = "mox21", resp = "y3"),
    set_prior("dirichlet(1, 1, 1, 1, 1)", class = "simo", coef = "mox31", resp = "y3"),
    set_prior("dirichlet(1, 1, 1, 1, 1)", class = "simo", coef = "mox41", resp = "y3")
  ),
  family = bernoulli,
  data = dat,
  chains = 8,
  cores = 8
)

summary(fit)

# Estimate model without monotonic effects

fit_cat <- brm(
  formula = 
    mvbind(y1, y2, y3) ~
    1 + x1 + x2 + x3 + x4 +
    (1 + x1 + x2 + x3 + x4 | Subject),
  prior = c(
    set_prior("lkj(1)", class = "cor"),
    set_prior("student_t(3, 0, 1)", class = "Intercept", resp = "y1"),
    set_prior("student_t(3, 0, 1)", class = "Intercept", resp = "y2"),
    set_prior("student_t(3, 0, 1)", class = "Intercept", resp = "y3"),
    set_prior("normal(0, 1)", class = "b", resp = "y1"),
    set_prior("normal(0, 1)", class = "b", resp = "y2"),
    set_prior("normal(0, 1)", class = "b", resp = "y3"),
    set_prior("student_t(3, 0, 2.5)", class = "sd", resp = "y1"),
    set_prior("student_t(3, 0, 2.5)", class = "sd", resp = "y2"),
    set_prior("student_t(3, 0, 2.5)", class = "sd", resp = "y3")
  ),
  family = bernoulli,
  data = dat_cat,
  chains = 8,
  cores = 8
)

summary(fit_cat)

Error information:

some chains had errors; consider specifying chains = 1 to debug here are whatever error messages were returned
[[1]]
Stan model '79de594461777ed9d95b8f80bc5e5f56' does not contain samples.

[1] "Chain 8: Informational Message: The current Metropolis proposal is about to be rejected because of the following issue:"                        
[2] "Chain 8: Exception: multiply: A[31] is nan, but must not be nan!  (in 'modeld3ec174e56d7_79de594461777ed9d95b8f80bc5e5f56' at line 235)"        
[3] "Chain 8: If this warning occurs sporadically, such as for highly constrained variable types like covariance matrices, then the sampler is fine,"
[4] "Chain 8: but if this warning occurs often then your model may be either severely ill-conditioned or misspecified."                              
[5] "Chain 8: "                                                                                                                                      
[6] "Error in sampler$call_sampler(args_list[[i]]) : "                                                                                               
[7] "  Exception: validate transformed params: y is not positive definite.  (in 'modeld3ec174e56d7_79de594461777ed9d95b8f80bc5e5f56' at line 334)"   
[1] "error occurred during calling the sampler; sampling not done"

I was also able to recreate the problem with the above code using n <- 300 so that will be even faster to reimplement.

[1] "Error in sampler$call_sampler(args_list[[i]]) : "                                                                                            
[2] "  Exception: validate transformed params: y is not positive definite.  (in 'modeld3ec57b413db_172b7b54ceee58ad24651e32f3e3bfa2' at line 331)"
[1] "error occurred during calling the sampler; sampling not done"

This is strange, when I run the first part of your example (N<-300, chains=4, cores=4) it bails at the first model every now and then,

Error: Invalid Dirichlet prior for the simplex of coefficient 'mox41'. Expected input of length 3.

but if I rerun the code (generate new data) it starts to compile and sample (using brms 2.13.0) — so the data generation seems to be shaky?

In short, with your example above, once the Invalid Dirichlet problem doesn’t exist, it compiles and samples w/o a problem.

I cannot reproduce your problem unfortunately. Can you please fix the random seed for data generation so that I can be sure to get a problematic data set?

It sounds like your seed did not include all six categories (0-5) for x4 and so the Dirichlet prior was misspecified. However, in my seed (and the real data) all categories are present (albeit some rarely). I am working on getting a random seed that recreates my problem now.

Ok, this reproduces the problem on my computer at least. Note that with this smaller model (n=300, chains=4) and seed, I am getting a divergent transition warning for the monotonic model, but otherwise it converges whereas the categorical model does not.

library(brms)
set.seed(2020+06+13)

# Simulate data without relationships

n <- 300

y1 <- rbinom(n, size = 1, prob = 0.354)
y2 <- rbinom(n, size = 1, prob = 0.313)
y3 <- rbinom(n, size = 1, prob = 0.241)

x1 <- sample(0:5, size = n, replace = TRUE, prob = c(0.535, 0.065, 0.089, 0.146, 0.132, 0.032))
x2 <- sample(0:5, size = n, replace = TRUE, prob = c(0.675, 0.028, 0.049, 0.076, 0.099, 0.073))
x3 <- sample(0:5, size = n, replace = TRUE, prob = c(0.767, 0.043, 0.057, 0.055, 0.057, 0.021))
x4 <- sample(0:5, size = n, replace = TRUE, prob = c(0.879, 0.003, 0.005, 0.011, 0.045, 0.057))

Subject <- sample(1:124, size = n, replace = TRUE)

dat <- data.frame(Subject, y1, y2, y3)
dat$x1 <- factor(x1, levels = 0:5, ordered = TRUE)
dat$x2 <- factor(x2, levels = 0:5, ordered = TRUE)
dat$x3 <- factor(x3, levels = 0:5, ordered = TRUE)
dat$x4 <- factor(x4, levels = 0:5, ordered = TRUE)

dat_cat <- data.frame(Subject, y1, y2, y3)
dat_cat$x1 <- factor(x1, levels = 0:5, ordered = FALSE)
dat_cat$x2 <- factor(x2, levels = 0:5, ordered = FALSE)
dat_cat$x3 <- factor(x3, levels = 0:5, ordered = FALSE)
dat_cat$x4 <- factor(x4, levels = 0:5, ordered = FALSE)

# Estimate model with monotonic effects

fit <- brm(
  formula = 
    mvbind(y1, y2, y3) ~
    1 + mo(x1) + mo(x2) + mo(x3) + mo(x4) +
    (1 + mo(x1) + mo(x2) + mo(x3) + mo(x4) | Subject),
  prior = c(
    set_prior("lkj(1)", class = "cor"),
    set_prior("student_t(3, 0, 1)", class = "Intercept", resp = "y1"),
    set_prior("student_t(3, 0, 1)", class = "Intercept", resp = "y2"),
    set_prior("student_t(3, 0, 1)", class = "Intercept", resp = "y3"),
    set_prior("normal(0, 1)", class = "b", resp = "y1"),
    set_prior("normal(0, 1)", class = "b", resp = "y2"),
    set_prior("normal(0, 1)", class = "b", resp = "y3"),
    set_prior("student_t(3, 0, 2.5)", class = "sd", resp = "y1"),
    set_prior("student_t(3, 0, 2.5)", class = "sd", resp = "y2"),
    set_prior("student_t(3, 0, 2.5)", class = "sd", resp = "y3"),
    set_prior("dirichlet(1, 1, 1, 1, 1)", class = "simo", coef = "mox11", resp = "y1"),
    set_prior("dirichlet(1, 1, 1, 1, 1)", class = "simo", coef = "mox21", resp = "y1"),
    set_prior("dirichlet(1, 1, 1, 1, 1)", class = "simo", coef = "mox31", resp = "y1"),
    set_prior("dirichlet(1, 1, 1, 1, 1)", class = "simo", coef = "mox41", resp = "y1"),
    set_prior("dirichlet(1, 1, 1, 1, 1)", class = "simo", coef = "mox11", resp = "y2"),
    set_prior("dirichlet(1, 1, 1, 1, 1)", class = "simo", coef = "mox21", resp = "y2"),
    set_prior("dirichlet(1, 1, 1, 1, 1)", class = "simo", coef = "mox31", resp = "y2"),
    set_prior("dirichlet(1, 1, 1, 1, 1)", class = "simo", coef = "mox41", resp = "y2"),
    set_prior("dirichlet(1, 1, 1, 1, 1)", class = "simo", coef = "mox11", resp = "y3"),
    set_prior("dirichlet(1, 1, 1, 1, 1)", class = "simo", coef = "mox21", resp = "y3"),
    set_prior("dirichlet(1, 1, 1, 1, 1)", class = "simo", coef = "mox31", resp = "y3"),
    set_prior("dirichlet(1, 1, 1, 1, 1)", class = "simo", coef = "mox41", resp = "y3")
  ),
  family = bernoulli,
  data = dat,
  chains = 4,
  cores = 4,
  seed = 2020+06+13
)

summary(fit)

# Estimate model without monotonic effects

fit_cat <- brm(
  formula = 
    mvbind(y1, y2, y3) ~
    1 + x1 + x2 + x3 + x4 +
    (1 + x1 + x2 + x3 + x4 | Subject),
  prior = c(
    set_prior("lkj(1)", class = "cor"),
    set_prior("student_t(3, 0, 1)", class = "Intercept", resp = "y1"),
    set_prior("student_t(3, 0, 1)", class = "Intercept", resp = "y2"),
    set_prior("student_t(3, 0, 1)", class = "Intercept", resp = "y3"),
    set_prior("normal(0, 1)", class = "b", resp = "y1"),
    set_prior("normal(0, 1)", class = "b", resp = "y2"),
    set_prior("normal(0, 1)", class = "b", resp = "y3"),
    set_prior("student_t(3, 0, 2.5)", class = "sd", resp = "y1"),
    set_prior("student_t(3, 0, 2.5)", class = "sd", resp = "y2"),
    set_prior("student_t(3, 0, 2.5)", class = "sd", resp = "y3")
  ),
  family = bernoulli,
  data = dat_cat,
  chains = 4,
  cores = 4,
  seed = 2020+06+13
)

summary(fit_cat)

Errors from second model:

Chain 4: 
[1] "Error in sampler$call_sampler(args_list[[i]]) : "                                                                                            
[2] "  Exception: validate transformed params: y is not positive definite.  (in 'model2aa832f96990_79de594461777ed9d95b8f80bc5e5f56' at line 340)"
[1] "error occurred during calling the sampler; sampling not done"

Chain 1: 
[1] "Error in sampler$call_sampler(args_list[[i]]) : "                                                                                            
[2] "  Exception: validate transformed params: y is not positive definite.  (in 'model2aa832f96990_79de594461777ed9d95b8f80bc5e5f56' at line 337)"
[1] "error occurred during calling the sampler; sampling not done"

I am sorry, but I still cannot reproduce the error. Have you tried using the latest dev version of brms? Also, which ®stan version and stanheaders are you using?

Same issue with dev version from github:

R version 4.0.1 (2020-06-06)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 18363)

Matrix products: default

locale:
[1] LC_COLLATE=English_United States.1252  LC_CTYPE=English_United States.1252    LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C                           LC_TIME=English_United States.1252    

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] brms_2.13.3  Rcpp_1.0.4.6

loaded via a namespace (and not attached):
  [1] nlme_3.1-148         matrixStats_0.56.0   fs_1.4.1             xts_0.12-0           usethis_1.6.1        devtools_2.3.0       threejs_0.3.3       
  [8] rprojroot_1.3-2      rstan_2.19.3         tools_4.0.1          backports_1.1.7      R6_2.4.1             DT_0.13              colorspace_1.4-1    
 [15] withr_2.2.0          tidyselect_1.1.0     gridExtra_2.3        prettyunits_1.1.1    processx_3.4.2       Brobdingnag_1.2-6    emmeans_1.4.7       
 [22] curl_4.3             compiler_4.0.1       cli_2.0.2            shinyjs_1.1          desc_1.2.0           colourpicker_1.0     scales_1.1.1        
 [29] dygraphs_1.1.1.6     mvtnorm_1.1-1        ggridges_0.5.2       callr_3.4.3          StanHeaders_2.21.0-5 stringr_1.4.0        digest_0.6.25       
 [36] base64enc_0.1-3      pkgconfig_2.0.3      htmltools_0.4.0      sessioninfo_1.1.1    fastmap_1.0.1        htmlwidgets_1.5.1    rlang_0.4.6         
 [43] rstudioapi_0.11      shiny_1.4.0.2        generics_0.0.2       zoo_1.8-8            crosstalk_1.1.0.1    gtools_3.8.2         dplyr_1.0.0         
 [50] inline_0.3.15        magrittr_1.5         loo_2.2.0            bayesplot_1.7.2      Matrix_1.2-18        munsell_0.5.0        fansi_0.4.1         
 [57] abind_1.4-5          lifecycle_0.2.0      stringi_1.4.6        pkgbuild_1.0.8       plyr_1.8.6           grid_4.0.1           parallel_4.0.1      
 [64] promises_1.1.1       crayon_1.3.4         miniUI_0.1.1.1       lattice_0.20-41      knitr_1.28           ps_1.3.3             pillar_1.4.4        
 [71] igraph_1.2.5         estimability_1.3     markdown_1.1         shinystan_2.5.0      codetools_0.2-16     reshape2_1.4.4       stats4_4.0.1        
 [78] pkgload_1.1.0        rstantools_2.1.0     glue_1.4.1           packrat_0.5.0        RcppParallel_5.0.1   remotes_2.1.1        vctrs_0.3.1         
 [85] httpuv_1.5.4         testthat_2.3.2       gtable_0.3.0         purrr_0.3.4          assertthat_0.2.1     ggplot2_3.3.1        xfun_0.14           
 [92] mime_0.9             xtable_1.8-4         coda_0.19-3          later_1.1.0.1        rsconnect_0.8.16     tibble_3.0.1         shinythemes_1.1.2   
 [99] memoise_1.1.0        ellipsis_0.3.1       bridgesampling_1.0-0

Hi Jeffrey,

I can actually reproduce this now,

[1] "Error in sampler$call_sampler(args_list[[i]]) : "                                                                                            
[2] "  Exception: validate transformed params: y is not positive definite.  (in 'modele48d6c379e1e_837bd99b61df504c000f37901d23e4d0' at line 340)"

and the same error at line 337. Hmm, now I need to find our where that model is…

I use OS X 10.15.4 and,

> packageVersion("rstan")
[1] ‘2.19.3’
> packageVersion("StanHeaders")
[1] ‘2.21.0.5’
> packageVersion("brms")
[1] ‘2.13.0’

Also tried it with latest dev version of brms and it gives the same error. Let’s see what @paul.buerkner has to say.

1 Like

I cannot reproduce it, so I am a little lost. Is y even a variable in transformed parameters?

Paul, the error pops up when the chains are finishing the sampling. Here’s the code that brms generates,

// generated with brms 2.13.3
functions {
}
data {
  int<lower=1> N;  // number of observations
  int<lower=1> N_y1;  // number of observations
  int Y_y1[N_y1];  // response variable
  int<lower=1> K_y1;  // number of population-level effects
  matrix[N_y1, K_y1] X_y1;  // population-level design matrix
  int<lower=1> N_y2;  // number of observations
  int Y_y2[N_y2];  // response variable
  int<lower=1> K_y2;  // number of population-level effects
  matrix[N_y2, K_y2] X_y2;  // population-level design matrix
  int<lower=1> N_y3;  // number of observations
  int Y_y3[N_y3];  // response variable
  int<lower=1> K_y3;  // number of population-level effects
  matrix[N_y3, K_y3] X_y3;  // population-level design matrix
  // data for group-level effects of ID 1
  int<lower=1> N_1;  // number of grouping levels
  int<lower=1> M_1;  // number of coefficients per level
  int<lower=1> J_1_y1[N_y1];  // grouping indicator per observation
  // group-level predictor values
  vector[N_y1] Z_1_y1_1;
  vector[N_y1] Z_1_y1_2;
  vector[N_y1] Z_1_y1_3;
  vector[N_y1] Z_1_y1_4;
  vector[N_y1] Z_1_y1_5;
  vector[N_y1] Z_1_y1_6;
  vector[N_y1] Z_1_y1_7;
  vector[N_y1] Z_1_y1_8;
  vector[N_y1] Z_1_y1_9;
  vector[N_y1] Z_1_y1_10;
  vector[N_y1] Z_1_y1_11;
  vector[N_y1] Z_1_y1_12;
  vector[N_y1] Z_1_y1_13;
  vector[N_y1] Z_1_y1_14;
  vector[N_y1] Z_1_y1_15;
  vector[N_y1] Z_1_y1_16;
  vector[N_y1] Z_1_y1_17;
  vector[N_y1] Z_1_y1_18;
  vector[N_y1] Z_1_y1_19;
  vector[N_y1] Z_1_y1_20;
  vector[N_y1] Z_1_y1_21;
  int<lower=1> NC_1;  // number of group-level correlations
  // data for group-level effects of ID 2
  int<lower=1> N_2;  // number of grouping levels
  int<lower=1> M_2;  // number of coefficients per level
  int<lower=1> J_2_y2[N_y2];  // grouping indicator per observation
  // group-level predictor values
  vector[N_y2] Z_2_y2_1;
  vector[N_y2] Z_2_y2_2;
  vector[N_y2] Z_2_y2_3;
  vector[N_y2] Z_2_y2_4;
  vector[N_y2] Z_2_y2_5;
  vector[N_y2] Z_2_y2_6;
  vector[N_y2] Z_2_y2_7;
  vector[N_y2] Z_2_y2_8;
  vector[N_y2] Z_2_y2_9;
  vector[N_y2] Z_2_y2_10;
  vector[N_y2] Z_2_y2_11;
  vector[N_y2] Z_2_y2_12;
  vector[N_y2] Z_2_y2_13;
  vector[N_y2] Z_2_y2_14;
  vector[N_y2] Z_2_y2_15;
  vector[N_y2] Z_2_y2_16;
  vector[N_y2] Z_2_y2_17;
  vector[N_y2] Z_2_y2_18;
  vector[N_y2] Z_2_y2_19;
  vector[N_y2] Z_2_y2_20;
  vector[N_y2] Z_2_y2_21;
  int<lower=1> NC_2;  // number of group-level correlations
  // data for group-level effects of ID 3
  int<lower=1> N_3;  // number of grouping levels
  int<lower=1> M_3;  // number of coefficients per level
  int<lower=1> J_3_y3[N_y3];  // grouping indicator per observation
  // group-level predictor values
  vector[N_y3] Z_3_y3_1;
  vector[N_y3] Z_3_y3_2;
  vector[N_y3] Z_3_y3_3;
  vector[N_y3] Z_3_y3_4;
  vector[N_y3] Z_3_y3_5;
  vector[N_y3] Z_3_y3_6;
  vector[N_y3] Z_3_y3_7;
  vector[N_y3] Z_3_y3_8;
  vector[N_y3] Z_3_y3_9;
  vector[N_y3] Z_3_y3_10;
  vector[N_y3] Z_3_y3_11;
  vector[N_y3] Z_3_y3_12;
  vector[N_y3] Z_3_y3_13;
  vector[N_y3] Z_3_y3_14;
  vector[N_y3] Z_3_y3_15;
  vector[N_y3] Z_3_y3_16;
  vector[N_y3] Z_3_y3_17;
  vector[N_y3] Z_3_y3_18;
  vector[N_y3] Z_3_y3_19;
  vector[N_y3] Z_3_y3_20;
  vector[N_y3] Z_3_y3_21;
  int<lower=1> NC_3;  // number of group-level correlations
  int prior_only;  // should the likelihood be ignored?
}
transformed data {
  int Kc_y1 = K_y1 - 1;
  matrix[N_y1, Kc_y1] Xc_y1;  // centered version of X_y1 without an intercept
  vector[Kc_y1] means_X_y1;  // column means of X_y1 before centering
  int Kc_y2 = K_y2 - 1;
  matrix[N_y2, Kc_y2] Xc_y2;  // centered version of X_y2 without an intercept
  vector[Kc_y2] means_X_y2;  // column means of X_y2 before centering
  int Kc_y3 = K_y3 - 1;
  matrix[N_y3, Kc_y3] Xc_y3;  // centered version of X_y3 without an intercept
  vector[Kc_y3] means_X_y3;  // column means of X_y3 before centering
  for (i in 2:K_y1) {
    means_X_y1[i - 1] = mean(X_y1[, i]);
    Xc_y1[, i - 1] = X_y1[, i] - means_X_y1[i - 1];
  }
  for (i in 2:K_y2) {
    means_X_y2[i - 1] = mean(X_y2[, i]);
    Xc_y2[, i - 1] = X_y2[, i] - means_X_y2[i - 1];
  }
  for (i in 2:K_y3) {
    means_X_y3[i - 1] = mean(X_y3[, i]);
    Xc_y3[, i - 1] = X_y3[, i] - means_X_y3[i - 1];
  }
}
parameters {
  vector[Kc_y1] b_y1;  // population-level effects
  real Intercept_y1;  // temporary intercept for centered predictors
  vector[Kc_y2] b_y2;  // population-level effects
  real Intercept_y2;  // temporary intercept for centered predictors
  vector[Kc_y3] b_y3;  // population-level effects
  real Intercept_y3;  // temporary intercept for centered predictors
  vector<lower=0>[M_1] sd_1;  // group-level standard deviations
  matrix[M_1, N_1] z_1;  // standardized group-level effects
  cholesky_factor_corr[M_1] L_1;  // cholesky factor of correlation matrix
  vector<lower=0>[M_2] sd_2;  // group-level standard deviations
  matrix[M_2, N_2] z_2;  // standardized group-level effects
  cholesky_factor_corr[M_2] L_2;  // cholesky factor of correlation matrix
  vector<lower=0>[M_3] sd_3;  // group-level standard deviations
  matrix[M_3, N_3] z_3;  // standardized group-level effects
  cholesky_factor_corr[M_3] L_3;  // cholesky factor of correlation matrix
}
transformed parameters {
  matrix[N_1, M_1] r_1;  // actual group-level effects
  // using vectors speeds up indexing in loops
  vector[N_1] r_1_y1_1;
  vector[N_1] r_1_y1_2;
  vector[N_1] r_1_y1_3;
  vector[N_1] r_1_y1_4;
  vector[N_1] r_1_y1_5;
  vector[N_1] r_1_y1_6;
  vector[N_1] r_1_y1_7;
  vector[N_1] r_1_y1_8;
  vector[N_1] r_1_y1_9;
  vector[N_1] r_1_y1_10;
  vector[N_1] r_1_y1_11;
  vector[N_1] r_1_y1_12;
  vector[N_1] r_1_y1_13;
  vector[N_1] r_1_y1_14;
  vector[N_1] r_1_y1_15;
  vector[N_1] r_1_y1_16;
  vector[N_1] r_1_y1_17;
  vector[N_1] r_1_y1_18;
  vector[N_1] r_1_y1_19;
  vector[N_1] r_1_y1_20;
  vector[N_1] r_1_y1_21;
  matrix[N_2, M_2] r_2;  // actual group-level effects
  // using vectors speeds up indexing in loops
  vector[N_2] r_2_y2_1;
  vector[N_2] r_2_y2_2;
  vector[N_2] r_2_y2_3;
  vector[N_2] r_2_y2_4;
  vector[N_2] r_2_y2_5;
  vector[N_2] r_2_y2_6;
  vector[N_2] r_2_y2_7;
  vector[N_2] r_2_y2_8;
  vector[N_2] r_2_y2_9;
  vector[N_2] r_2_y2_10;
  vector[N_2] r_2_y2_11;
  vector[N_2] r_2_y2_12;
  vector[N_2] r_2_y2_13;
  vector[N_2] r_2_y2_14;
  vector[N_2] r_2_y2_15;
  vector[N_2] r_2_y2_16;
  vector[N_2] r_2_y2_17;
  vector[N_2] r_2_y2_18;
  vector[N_2] r_2_y2_19;
  vector[N_2] r_2_y2_20;
  vector[N_2] r_2_y2_21;
  matrix[N_3, M_3] r_3;  // actual group-level effects
  // using vectors speeds up indexing in loops
  vector[N_3] r_3_y3_1;
  vector[N_3] r_3_y3_2;
  vector[N_3] r_3_y3_3;
  vector[N_3] r_3_y3_4;
  vector[N_3] r_3_y3_5;
  vector[N_3] r_3_y3_6;
  vector[N_3] r_3_y3_7;
  vector[N_3] r_3_y3_8;
  vector[N_3] r_3_y3_9;
  vector[N_3] r_3_y3_10;
  vector[N_3] r_3_y3_11;
  vector[N_3] r_3_y3_12;
  vector[N_3] r_3_y3_13;
  vector[N_3] r_3_y3_14;
  vector[N_3] r_3_y3_15;
  vector[N_3] r_3_y3_16;
  vector[N_3] r_3_y3_17;
  vector[N_3] r_3_y3_18;
  vector[N_3] r_3_y3_19;
  vector[N_3] r_3_y3_20;
  vector[N_3] r_3_y3_21;
  // compute actual group-level effects
  r_1 = (diag_pre_multiply(sd_1, L_1) * z_1)';
  r_1_y1_1 = r_1[, 1];
  r_1_y1_2 = r_1[, 2];
  r_1_y1_3 = r_1[, 3];
  r_1_y1_4 = r_1[, 4];
  r_1_y1_5 = r_1[, 5];
  r_1_y1_6 = r_1[, 6];
  r_1_y1_7 = r_1[, 7];
  r_1_y1_8 = r_1[, 8];
  r_1_y1_9 = r_1[, 9];
  r_1_y1_10 = r_1[, 10];
  r_1_y1_11 = r_1[, 11];
  r_1_y1_12 = r_1[, 12];
  r_1_y1_13 = r_1[, 13];
  r_1_y1_14 = r_1[, 14];
  r_1_y1_15 = r_1[, 15];
  r_1_y1_16 = r_1[, 16];
  r_1_y1_17 = r_1[, 17];
  r_1_y1_18 = r_1[, 18];
  r_1_y1_19 = r_1[, 19];
  r_1_y1_20 = r_1[, 20];
  r_1_y1_21 = r_1[, 21];
  // compute actual group-level effects
  r_2 = (diag_pre_multiply(sd_2, L_2) * z_2)';
  r_2_y2_1 = r_2[, 1];
  r_2_y2_2 = r_2[, 2];
  r_2_y2_3 = r_2[, 3];
  r_2_y2_4 = r_2[, 4];
  r_2_y2_5 = r_2[, 5];
  r_2_y2_6 = r_2[, 6];
  r_2_y2_7 = r_2[, 7];
  r_2_y2_8 = r_2[, 8];
  r_2_y2_9 = r_2[, 9];
  r_2_y2_10 = r_2[, 10];
  r_2_y2_11 = r_2[, 11];
  r_2_y2_12 = r_2[, 12];
  r_2_y2_13 = r_2[, 13];
  r_2_y2_14 = r_2[, 14];
  r_2_y2_15 = r_2[, 15];
  r_2_y2_16 = r_2[, 16];
  r_2_y2_17 = r_2[, 17];
  r_2_y2_18 = r_2[, 18];
  r_2_y2_19 = r_2[, 19];
  r_2_y2_20 = r_2[, 20];
  r_2_y2_21 = r_2[, 21];
  // compute actual group-level effects
  r_3 = (diag_pre_multiply(sd_3, L_3) * z_3)';
  r_3_y3_1 = r_3[, 1];
  r_3_y3_2 = r_3[, 2];
  r_3_y3_3 = r_3[, 3];
  r_3_y3_4 = r_3[, 4];
  r_3_y3_5 = r_3[, 5];
  r_3_y3_6 = r_3[, 6];
  r_3_y3_7 = r_3[, 7];
  r_3_y3_8 = r_3[, 8];
  r_3_y3_9 = r_3[, 9];
  r_3_y3_10 = r_3[, 10];
  r_3_y3_11 = r_3[, 11];
  r_3_y3_12 = r_3[, 12];
  r_3_y3_13 = r_3[, 13];
  r_3_y3_14 = r_3[, 14];
  r_3_y3_15 = r_3[, 15];
  r_3_y3_16 = r_3[, 16];
  r_3_y3_17 = r_3[, 17];
  r_3_y3_18 = r_3[, 18];
  r_3_y3_19 = r_3[, 19];
  r_3_y3_20 = r_3[, 20];
  r_3_y3_21 = r_3[, 21];
}
model {
  // initialize linear predictor term
  vector[N_y1] mu_y1 = Intercept_y1 + Xc_y1 * b_y1;
  // initialize linear predictor term
  vector[N_y2] mu_y2 = Intercept_y2 + Xc_y2 * b_y2;
  // initialize linear predictor term
  vector[N_y3] mu_y3 = Intercept_y3 + Xc_y3 * b_y3;
  for (n in 1:N_y1) {
    // add more terms to the linear predictor
    mu_y1[n] += r_1_y1_1[J_1_y1[n]] * Z_1_y1_1[n] + r_1_y1_2[J_1_y1[n]] * Z_1_y1_2[n] + r_1_y1_3[J_1_y1[n]] * Z_1_y1_3[n] + r_1_y1_4[J_1_y1[n]] * Z_1_y1_4[n] + r_1_y1_5[J_1_y1[n]] * Z_1_y1_5[n] + r_1_y1_6[J_1_y1[n]] * Z_1_y1_6[n] + r_1_y1_7[J_1_y1[n]] * Z_1_y1_7[n] + r_1_y1_8[J_1_y1[n]] * Z_1_y1_8[n] + r_1_y1_9[J_1_y1[n]] * Z_1_y1_9[n] + r_1_y1_10[J_1_y1[n]] * Z_1_y1_10[n] + r_1_y1_11[J_1_y1[n]] * Z_1_y1_11[n] + r_1_y1_12[J_1_y1[n]] * Z_1_y1_12[n] + r_1_y1_13[J_1_y1[n]] * Z_1_y1_13[n] + r_1_y1_14[J_1_y1[n]] * Z_1_y1_14[n] + r_1_y1_15[J_1_y1[n]] * Z_1_y1_15[n] + r_1_y1_16[J_1_y1[n]] * Z_1_y1_16[n] + r_1_y1_17[J_1_y1[n]] * Z_1_y1_17[n] + r_1_y1_18[J_1_y1[n]] * Z_1_y1_18[n] + r_1_y1_19[J_1_y1[n]] * Z_1_y1_19[n] + r_1_y1_20[J_1_y1[n]] * Z_1_y1_20[n] + r_1_y1_21[J_1_y1[n]] * Z_1_y1_21[n];
  }
  for (n in 1:N_y2) {
    // add more terms to the linear predictor
    mu_y2[n] += r_2_y2_1[J_2_y2[n]] * Z_2_y2_1[n] + r_2_y2_2[J_2_y2[n]] * Z_2_y2_2[n] + r_2_y2_3[J_2_y2[n]] * Z_2_y2_3[n] + r_2_y2_4[J_2_y2[n]] * Z_2_y2_4[n] + r_2_y2_5[J_2_y2[n]] * Z_2_y2_5[n] + r_2_y2_6[J_2_y2[n]] * Z_2_y2_6[n] + r_2_y2_7[J_2_y2[n]] * Z_2_y2_7[n] + r_2_y2_8[J_2_y2[n]] * Z_2_y2_8[n] + r_2_y2_9[J_2_y2[n]] * Z_2_y2_9[n] + r_2_y2_10[J_2_y2[n]] * Z_2_y2_10[n] + r_2_y2_11[J_2_y2[n]] * Z_2_y2_11[n] + r_2_y2_12[J_2_y2[n]] * Z_2_y2_12[n] + r_2_y2_13[J_2_y2[n]] * Z_2_y2_13[n] + r_2_y2_14[J_2_y2[n]] * Z_2_y2_14[n] + r_2_y2_15[J_2_y2[n]] * Z_2_y2_15[n] + r_2_y2_16[J_2_y2[n]] * Z_2_y2_16[n] + r_2_y2_17[J_2_y2[n]] * Z_2_y2_17[n] + r_2_y2_18[J_2_y2[n]] * Z_2_y2_18[n] + r_2_y2_19[J_2_y2[n]] * Z_2_y2_19[n] + r_2_y2_20[J_2_y2[n]] * Z_2_y2_20[n] + r_2_y2_21[J_2_y2[n]] * Z_2_y2_21[n];
  }
  for (n in 1:N_y3) {
    // add more terms to the linear predictor
    mu_y3[n] += r_3_y3_1[J_3_y3[n]] * Z_3_y3_1[n] + r_3_y3_2[J_3_y3[n]] * Z_3_y3_2[n] + r_3_y3_3[J_3_y3[n]] * Z_3_y3_3[n] + r_3_y3_4[J_3_y3[n]] * Z_3_y3_4[n] + r_3_y3_5[J_3_y3[n]] * Z_3_y3_5[n] + r_3_y3_6[J_3_y3[n]] * Z_3_y3_6[n] + r_3_y3_7[J_3_y3[n]] * Z_3_y3_7[n] + r_3_y3_8[J_3_y3[n]] * Z_3_y3_8[n] + r_3_y3_9[J_3_y3[n]] * Z_3_y3_9[n] + r_3_y3_10[J_3_y3[n]] * Z_3_y3_10[n] + r_3_y3_11[J_3_y3[n]] * Z_3_y3_11[n] + r_3_y3_12[J_3_y3[n]] * Z_3_y3_12[n] + r_3_y3_13[J_3_y3[n]] * Z_3_y3_13[n] + r_3_y3_14[J_3_y3[n]] * Z_3_y3_14[n] + r_3_y3_15[J_3_y3[n]] * Z_3_y3_15[n] + r_3_y3_16[J_3_y3[n]] * Z_3_y3_16[n] + r_3_y3_17[J_3_y3[n]] * Z_3_y3_17[n] + r_3_y3_18[J_3_y3[n]] * Z_3_y3_18[n] + r_3_y3_19[J_3_y3[n]] * Z_3_y3_19[n] + r_3_y3_20[J_3_y3[n]] * Z_3_y3_20[n] + r_3_y3_21[J_3_y3[n]] * Z_3_y3_21[n];
  }
  // priors including all constants
  target += normal_lpdf(b_y1 | 0, 1);
  target += student_t_lpdf(Intercept_y1 | 3, 0, 1);
  target += normal_lpdf(b_y2 | 0, 1);
  target += student_t_lpdf(Intercept_y2 | 3, 0, 1);
  target += normal_lpdf(b_y3 | 0, 1);
  target += student_t_lpdf(Intercept_y3 | 3, 0, 1);
  target += student_t_lpdf(sd_1 | 3, 0, 2.5)
    - 21 * student_t_lccdf(0 | 3, 0, 2.5);
  target += std_normal_lpdf(to_vector(z_1));
  target += lkj_corr_cholesky_lpdf(L_1 | 1);
  target += student_t_lpdf(sd_2 | 3, 0, 2.5)
    - 21 * student_t_lccdf(0 | 3, 0, 2.5);
  target += std_normal_lpdf(to_vector(z_2));
  target += lkj_corr_cholesky_lpdf(L_2 | 1);
  target += student_t_lpdf(sd_3 | 3, 0, 2.5)
    - 21 * student_t_lccdf(0 | 3, 0, 2.5);
  target += std_normal_lpdf(to_vector(z_3));
  target += lkj_corr_cholesky_lpdf(L_3 | 1);
  // likelihood including all constants
  if (!prior_only) {
    target += bernoulli_logit_lpmf(Y_y1 | mu_y1);
    target += bernoulli_logit_lpmf(Y_y2 | mu_y2);
    target += bernoulli_logit_lpmf(Y_y3 | mu_y3);
  }
}
generated quantities {
  // actual population-level intercept
  real b_y1_Intercept = Intercept_y1 - dot_product(means_X_y1, b_y1);
  // actual population-level intercept
  real b_y2_Intercept = Intercept_y2 - dot_product(means_X_y2, b_y2);
  // actual population-level intercept
  real b_y3_Intercept = Intercept_y3 - dot_product(means_X_y3, b_y3);
  // compute group-level correlations
  corr_matrix[M_1] Cor_1 = multiply_lower_tri_self_transpose(L_1);
  vector<lower=-1,upper=1>[NC_1] cor_1;
  // compute group-level correlations
  corr_matrix[M_2] Cor_2 = multiply_lower_tri_self_transpose(L_2);
  vector<lower=-1,upper=1>[NC_2] cor_2;
  // compute group-level correlations
  corr_matrix[M_3] Cor_3 = multiply_lower_tri_self_transpose(L_3);
  vector<lower=-1,upper=1>[NC_3] cor_3;
  // extract upper diagonal of correlation matrix
  for (k in 1:M_1) {
    for (j in 1:(k - 1)) {
      cor_1[choose(k - 1, 2) + j] = Cor_1[j, k];
    }
  }
  // extract upper diagonal of correlation matrix
  for (k in 1:M_2) {
    for (j in 1:(k - 1)) {
      cor_2[choose(k - 1, 2) + j] = Cor_2[j, k];
    }
  }
  // extract upper diagonal of correlation matrix
  for (k in 1:M_3) {
    for (j in 1:(k - 1)) {
      cor_3[choose(k - 1, 2) + j] = Cor_3[j, k];
    }
  }
}

I still don’t see it but it seems also stan seed dependent as it apperently happens only for some chains. @bgoodri have you seen this error before?

Does the error still occur if you change multiply_lower_tri_self_transpose to tcrossprod?

If you declare the corr_matrixon lines 337 and 340 to matrix and look at the corresponding output. Is it a floating point issue?

I’ve not used stan much outside of brms. To make these changes, I guess I just replace all instances of multiply_lower_tri_self_transpose with tcrossprod in the stan file that brms generates and then run the updated file through rstan?

Yes you can just modify the stan code you have directly and save it. Also - separately - try changing corr_matrix to matrix. Corr_matrix has a check that may be too sensitive. It may be possible to bypass that check. But inspect those matrices yourself to make sure they’re just floating point issues.