I am running a mixed effects model with brm. I am following the tips that I have read here before about slowly building up since my initial complicated model did not perform well at all. Here is the brm code i am using so far:

```
test.m1 <- brm(bf(party ~ income + (1|scode) + (1|gender), decomp = "QR"),
family = categorical,
iter = 2200, warmup = 1400,
data = megapoll, cores = nCores, chains = nCores,
prior = testp,
control = list(max_treedepth = 10, adapt_delta = 0.9))
```

And here is the stancode() code below

```
functions {
}
data {
int<lower=1> N; // total number of observations
int<lower=2> ncat; // number of categories
int Y[N]; // response variable
int<lower=1> K_mu2; // number of population-level effects
matrix[N, K_mu2] X_mu2; // population-level design matrix
int<lower=1> K_mu3; // number of population-level effects
matrix[N, K_mu3] X_mu3; // population-level design matrix
// data for group-level effects of ID 1
int<lower=1> N_1; // number of grouping levels
int<lower=1> M_1; // number of coefficients per level
int<lower=1> J_1[N]; // grouping indicator per observation
// group-level predictor values
vector[N] Z_1_mu2_1;
// data for group-level effects of ID 2
int<lower=1> N_2; // number of grouping levels
int<lower=1> M_2; // number of coefficients per level
int<lower=1> J_2[N]; // grouping indicator per observation
// group-level predictor values
vector[N] Z_2_mu2_1;
// data for group-level effects of ID 3
int<lower=1> N_3; // number of grouping levels
int<lower=1> M_3; // number of coefficients per level
int<lower=1> J_3[N]; // grouping indicator per observation
// group-level predictor values
vector[N] Z_3_mu3_1;
// data for group-level effects of ID 4
int<lower=1> N_4; // number of grouping levels
int<lower=1> M_4; // number of coefficients per level
int<lower=1> J_4[N]; // grouping indicator per observation
// group-level predictor values
vector[N] Z_4_mu3_1;
int prior_only; // should the likelihood be ignored?
}
transformed data {
int Kc_mu2 = K_mu2 - 1;
matrix[N, Kc_mu2] Xc_mu2; // centered version of X_mu2 without an intercept
vector[Kc_mu2] means_X_mu2; // column means of X_mu2 before centering
// matrices for QR decomposition
matrix[N, Kc_mu2] XQ_mu2;
matrix[Kc_mu2, Kc_mu2] XR_mu2;
matrix[Kc_mu2, Kc_mu2] XR_mu2_inv;
int Kc_mu3 = K_mu3 - 1;
matrix[N, Kc_mu3] Xc_mu3; // centered version of X_mu3 without an intercept
vector[Kc_mu3] means_X_mu3; // column means of X_mu3 before centering
// matrices for QR decomposition
matrix[N, Kc_mu3] XQ_mu3;
matrix[Kc_mu3, Kc_mu3] XR_mu3;
matrix[Kc_mu3, Kc_mu3] XR_mu3_inv;
for (i in 2:K_mu2) {
means_X_mu2[i - 1] = mean(X_mu2[, i]);
Xc_mu2[, i - 1] = X_mu2[, i] - means_X_mu2[i - 1];
}
// compute and scale QR decomposition
XQ_mu2 = qr_thin_Q(Xc_mu2) * sqrt(N - 1);
XR_mu2 = qr_thin_R(Xc_mu2) / sqrt(N - 1);
XR_mu2_inv = inverse(XR_mu2);
for (i in 2:K_mu3) {
means_X_mu3[i - 1] = mean(X_mu3[, i]);
Xc_mu3[, i - 1] = X_mu3[, i] - means_X_mu3[i - 1];
}
// compute and scale QR decomposition
XQ_mu3 = qr_thin_Q(Xc_mu3) * sqrt(N - 1);
XR_mu3 = qr_thin_R(Xc_mu3) / sqrt(N - 1);
XR_mu3_inv = inverse(XR_mu3);
}
parameters {
vector[Kc_mu2] bQ_mu2; // regression coefficients at QR scale
real Intercept_mu2; // temporary intercept for centered predictors
vector[Kc_mu3] bQ_mu3; // regression coefficients at QR scale
real Intercept_mu3; // temporary intercept for centered predictors
vector<lower=0>[M_1] sd_1; // group-level standard deviations
vector[N_1] z_1[M_1]; // standardized group-level effects
vector<lower=0>[M_2] sd_2; // group-level standard deviations
vector[N_2] z_2[M_2]; // standardized group-level effects
vector<lower=0>[M_3] sd_3; // group-level standard deviations
vector[N_3] z_3[M_3]; // standardized group-level effects
vector<lower=0>[M_4] sd_4; // group-level standard deviations
vector[N_4] z_4[M_4]; // standardized group-level effects
}
transformed parameters {
vector[N_1] r_1_mu2_1; // actual group-level effects
vector[N_2] r_2_mu2_1; // actual group-level effects
vector[N_3] r_3_mu3_1; // actual group-level effects
vector[N_4] r_4_mu3_1; // actual group-level effects
real lprior = 0; // prior contributions to the log posterior
r_1_mu2_1 = (sd_1[1] * (z_1[1]));
r_2_mu2_1 = (sd_2[1] * (z_2[1]));
r_3_mu3_1 = (sd_3[1] * (z_3[1]));
r_4_mu3_1 = (sd_4[1] * (z_4[1]));
lprior += normal_lpdf(bQ_mu2 | 0,5);
lprior += student_t_lpdf(Intercept_mu2 | 3, 0, 2.5);
lprior += normal_lpdf(bQ_mu3 | 0,5);
lprior += student_t_lpdf(Intercept_mu3 | 3, 0, 2.5);
lprior += student_t_lpdf(sd_1 | 3, 0, 2.5)
- 1 * student_t_lccdf(0 | 3, 0, 2.5);
lprior += student_t_lpdf(sd_2 | 3, 0, 2.5)
- 1 * student_t_lccdf(0 | 3, 0, 2.5);
lprior += student_t_lpdf(sd_3 | 3, 0, 2.5)
- 1 * student_t_lccdf(0 | 3, 0, 2.5);
lprior += student_t_lpdf(sd_4 | 3, 0, 2.5)
- 1 * student_t_lccdf(0 | 3, 0, 2.5);
}
model {
// likelihood including constants
if (!prior_only) {
// initialize linear predictor term
vector[N] mu2 = rep_vector(0.0, N);
// initialize linear predictor term
vector[N] mu3 = rep_vector(0.0, N);
// linear predictor matrix
vector[ncat] mu[N];
mu2 += Intercept_mu2 + XQ_mu2 * bQ_mu2;
mu3 += Intercept_mu3 + XQ_mu3 * bQ_mu3;
for (n in 1:N) {
// add more terms to the linear predictor
mu2[n] += r_1_mu2_1[J_1[n]] * Z_1_mu2_1[n] + r_2_mu2_1[J_2[n]] * Z_2_mu2_1[n];
}
for (n in 1:N) {
// add more terms to the linear predictor
mu3[n] += r_3_mu3_1[J_3[n]] * Z_3_mu3_1[n] + r_4_mu3_1[J_4[n]] * Z_4_mu3_1[n];
}
for (n in 1:N) {
mu[n] = transpose([0, mu2[n], mu3[n]]);
}
for (n in 1:N) {
target += categorical_logit_lpmf(Y[n] | mu[n]);
}
}
// priors including constants
target += lprior;
target += std_normal_lpdf(z_1[1]);
target += std_normal_lpdf(z_2[1]);
target += std_normal_lpdf(z_3[1]);
target += std_normal_lpdf(z_4[1]);
}
generated quantities {
// obtain the actual coefficients
vector[Kc_mu2] b_mu2 = XR_mu2_inv * bQ_mu2;
// actual population-level intercept
real b_mu2_Intercept = Intercept_mu2 - dot_product(means_X_mu2, b_mu2);
// obtain the actual coefficients
vector[Kc_mu3] b_mu3 = XR_mu3_inv * bQ_mu3;
// actual population-level intercept
real b_mu3_Intercept = Intercept_mu3 - dot_product(means_X_mu3, b_mu3);
}
```

The âgenderâ RE is causing my divergent transitions (see attached photo) at the mu_2 and mu_3 intercepts. I initially thought this would be an issue of the priors, but the pp_check seems to suggest that this is not the case.

Any advice on where to go? Thanks yâall, I appreciate yâallâs time!