Hello,
I am trying to use the index approach for categorical predictors in a model, that should include factors and continuous variables, with a nested design.
Here is my code for generating the data and model using the indicator approach:
# Generation of data for example:
SuperFamily = sample(c("a","b","c"),10, replace=TRUE)
Family = sample(c("l","m","n","o","p"),10, replace=TRUE)
y = sample(c(1,0),10, replace=TRUE)
x11 = rnorm(10,1,0.3)
x22 = rnorm(10,1,0.2)
factor1 = sample(c("h","i","j","k"),10, replace=TRUE)
factor2 = sample(c("e","f","g"),10, replace=TRUE)
# Minmax normalization of continuous predictors:
x1 = (x11min(x11))/(max(x11)min(x11))
x2 = (x22min(x22))/(max(x22)min(x22))
# Combine the vectors in a dataframe:
dstan < as.data.frame(cbind(SuperFamily, Family, y, x1, x2, factor1, factor2))
# Classification of variables:
dstan$SuperFamily = as.factor(dstan$SuperFamily)
dstan$Family = as.factor(dstan$Family)
dstan$y = as.integer(dstan$y)
dstan$x1 = as.numeric(dstan$x1)
dstan$x2 = as.numeric(dstan$x2)
dstan$factor1 = as.factor(dstan$factor1)
dstan$factor2 = as.factor(dstan$factor2)
str(dstan)
library(brms)
model1 < brm(
y ~ x1 + x2 + factor1 + factor2 + (1  SuperFamily/Family),
data = dstan,
family = bernoulli(link = "logit"),
prior = c(
prior(normal(0.5, 0.25), "b"),
prior(cauchy(0, 1), class = "sd")
),
chains = 4, iter = 450, warmup = 150, cores = getOption("mc.cores", 4L), control = list(max_treedepth = 12, adapt_delta = 0.99), seed=1
)
I have tried to transform the above model for index approach in two ways, according to what I have found here, here and here:
Way 1: Changing global intercept to 0:
model1.2 < brm(
y ~ 0 + x1 + x2 + factor1 + factor2 + (1  SuperFamily/Family),
data = dstan,
family = bernoulli(link = "logit"),
prior = c(
prior(normal(0.5, 0.25), "b"),
prior(cauchy(0, 1), class = "sd")
),
chains = 4, iter = 450, warmup = 150, cores = getOption("mc.cores", 4L), control = list(max_treedepth = 12, adapt_delta = 0.99), seed=1
)
However, using â€ś0â€ť as a global intercept might not be an appropriate solution for a model with continuous variables and multiple categorical predictors. Some of the problems Iâ€™ve found were:
 The posterior estimates donâ€™t include a global intercept (e.g., for x=0, I would expect the intercept to equal something else than 0, as 0 is not the mean of y);
 Not all the levels of â€śfactor2â€ť appear among the posterior estimated parameters.
Way 2: Using bf() function with different intercepts for each variable:
model1.3 < brm(
formula = bf(
y~ a*x1 + b*x2 + c + d + e ,
a ~ 1 + x1,
b ~ 1 + x2,
c ~ 0 + factor1,
d ~ 0 + factor2,
e ~ 0 + (1SuperFamily/Family),
nl = TRUE),
data = dstan,
family = bernoulli(link = "logit"),
prior = prior(cauchy(0, 1), nlpar = "e", class="sd") +
prior(normal(0.5, 0.25), nlpar = "a") +
prior(normal(0.5, 0.25), nlpar = "b") +
prior(normal(0.5, 0.25), nlpar = "c") +
prior(normal(0.5, 0.25), nlpar = "d"),
chains = 4, iter = 450, warmup = 150, cores = getOption("mc.cores", 4L), control = list(max_treedepth = 12, adapt_delta = 0.99), seed=1
)
Additionally, Iâ€™m not sure that Iâ€™m using the right syntax in bf():

Matching an intercept for each continuous variable, instead of having a global intercept, feels particularly strange.

Also, is the formula transformed correctly from the original model (model1)?
I would be happy to hear your opinion on my solutions and will be thankful if you could help me correctly convert my model (i.e., â€śmodel1â€ť) to index approach in brms.
Many thanks in advance!