Here is an example of how complex predictor terms can easily get with brms:
set.seed(1234)
dat <- data.frame(
count = rpois(236, lambda = 20),
visit = rep(1:4, each = 59),
patient = factor(rep(1:59, 4)),
Age = rnorm(236),
Trt = factor(sample(0:1, 236, TRUE)),
AgeSD = abs(rnorm(236, 1)),
Exp = sample(1:5, 236, TRUE),
volume = rnorm(236),
gender = factor(c(rep("m", 30), rep("f", 29)))
)
make_stancode(
formula =
bf(count ~ Trt*Age*mo(Exp) + s(Age) +
offset(Age) + (1+Trt|visit),
sigma ~ Trt),
data = dat, family = student(),
prior = set_prior("normal(0,2)", class = "b") +
set_prior("cauchy(0,2)", class = "sd") +
set_prior("normal(0,3)", dpar = "sigma")
)
which results in the Stan code
// generated with brms 2.11.2
functions {
/* compute the logm1 link
* Args:
* p: a positive scalar
* Returns:
* a scalar in (-Inf, Inf)
*/
real logm1(real y) {
return log(y - 1);
}
/* compute the inverse of the logm1 link
* Args:
* y: a scalar in (-Inf, Inf)
* Returns:
* a positive scalar
*/
real expp1(real y) {
return exp(y) + 1;
}
/* compute monotonic effects
* Args:
* scale: a simplex parameter
* i: index to sum over the simplex
* Returns:
* a scalar between 0 and 1
*/
real mo(vector scale, int i) {
if (i == 0) {
return 0;
} else {
return rows(scale) * sum(scale[1:i]);
}
}
}
data {
int<lower=1> N; // number of observations
vector[N] Y; // response variable
int<lower=1> K; // number of population-level effects
matrix[N, K] X; // population-level design matrix
int<lower=1> Ksp; // number of special effects terms
// covariates of special effects terms
vector[N] Csp_1;
vector[N] Csp_2;
vector[N] Csp_3;
int<lower=1> Imo; // number of monotonic variables
int<lower=1> Jmo[Imo]; // length of simplexes
// monotonic variables
int Xmo_1[N];
int Xmo_2[N];
int Xmo_3[N];
int Xmo_4[N];
// prior concentration of monotonic simplexes
vector[Jmo[1]] con_simo_1;
vector[Jmo[2]] con_simo_2;
vector[Jmo[3]] con_simo_3;
vector[Jmo[4]] con_simo_4;
// data for splines
int Ks; // number of linear effects
matrix[N, Ks] Xs; // design matrix for the linear effects
// data for spline s(Age)
int nb_1; // number of bases
int knots_1[nb_1]; // number of knots
// basis function matrices
matrix[N, knots_1[1]] Zs_1_1;
vector[N] offset;
int<lower=1> K_sigma; // number of population-level effects
matrix[N, K_sigma] X_sigma; // population-level design matrix
// data for group-level effects of ID 1
int<lower=1> N_1; // number of grouping levels
int<lower=1> M_1; // number of coefficients per level
int<lower=1> J_1[N]; // grouping indicator per observation
// group-level predictor values
vector[N] Z_1_1;
vector[N] Z_1_2;
int<lower=1> NC_1; // number of group-level correlations
int prior_only; // should the likelihood be ignored?
}
transformed data {
int Kc = K - 1;
matrix[N, Kc] Xc; // centered version of X without an intercept
vector[Kc] means_X; // column means of X before centering
int Kc_sigma = K_sigma - 1;
matrix[N, Kc_sigma] Xc_sigma; // centered version of X_sigma without an intercept
vector[Kc_sigma] means_X_sigma; // column means of X_sigma before centering
for (i in 2:K) {
means_X[i - 1] = mean(X[, i]);
Xc[, i - 1] = X[, i] - means_X[i - 1];
}
for (i in 2:K_sigma) {
means_X_sigma[i - 1] = mean(X_sigma[, i]);
Xc_sigma[, i - 1] = X_sigma[, i] - means_X_sigma[i - 1];
}
}
parameters {
vector[Kc] b; // population-level effects
// temporary intercept for centered predictors
real Intercept;
// special effects coefficients
vector[Ksp] bsp;
// simplexes of monotonic effects
simplex[Jmo[1]] simo_1;
simplex[Jmo[2]] simo_2;
simplex[Jmo[3]] simo_3;
simplex[Jmo[4]] simo_4;
// spline coefficients
vector[Ks] bs;
// parameters for spline s(Age)
// standarized spline coefficients
vector[knots_1[1]] zs_1_1;
// standard deviations of the coefficients
real<lower=0> sds_1_1;
vector[Kc_sigma] b_sigma; // population-level effects
// temporary intercept for centered predictors
real Intercept_sigma;
real<lower=1> nu; // degrees of freedom or shape
vector<lower=0>[M_1] sd_1; // group-level standard deviations
matrix[M_1, N_1] z_1; // standardized group-level effects
// cholesky factor of correlation matrix
cholesky_factor_corr[M_1] L_1;
}
transformed parameters {
// actual spline coefficients
vector[knots_1[1]] s_1_1 = sds_1_1 * zs_1_1;
// actual group-level effects
matrix[N_1, M_1] r_1 = (diag_pre_multiply(sd_1, L_1) * z_1)';
// using vectors speeds up indexing in loops
vector[N_1] r_1_1 = r_1[, 1];
vector[N_1] r_1_2 = r_1[, 2];
}
model {
// initialize linear predictor term
vector[N] mu = Intercept + Xc * b + Xs * bs + Zs_1_1 * s_1_1 + offset;
// initialize linear predictor term
vector[N] sigma = Intercept_sigma + Xc_sigma * b_sigma;
for (n in 1:N) {
// add more terms to the linear predictor
mu[n] += (bsp[1]) * mo(simo_1, Xmo_1[n]) + (bsp[2]) * mo(simo_2, Xmo_2[n]) * Csp_1[n] + (bsp[3]) * mo(simo_3, Xmo_3[n]) * Csp_2[n] + (bsp[4]) * mo(simo_4, Xmo_4[n]) * Csp_3[n] + r_1_1[J_1[n]] * Z_1_1[n] + r_1_2[J_1[n]] * Z_1_2[n];
}
for (n in 1:N) {
// apply the inverse link function
sigma[n] = exp(sigma[n]);
}
// priors including all constants
target += normal_lpdf(b | 0,2);
target += student_t_lpdf(Intercept | 3, 20, 10);
target += normal_lpdf(bsp | 0,2);
target += dirichlet_lpdf(simo_1 | con_simo_1);
target += dirichlet_lpdf(simo_2 | con_simo_2);
target += dirichlet_lpdf(simo_3 | con_simo_3);
target += dirichlet_lpdf(simo_4 | con_simo_4);
target += normal_lpdf(bs | 0,2)
- 1 * normal_lccdf(0 | 0,2);
target += normal_lpdf(zs_1_1 | 0, 1);
target += student_t_lpdf(sds_1_1 | 3, 0, 10)
- 1 * student_t_lccdf(0 | 3, 0, 10);
target += normal_lpdf(b_sigma | 0,3);
target += student_t_lpdf(Intercept_sigma | 3, 0, 10);
target += gamma_lpdf(nu | 2, 0.1)
- 1 * gamma_lccdf(1 | 2, 0.1);
target += cauchy_lpdf(sd_1 | 0,2)
- 2 * cauchy_lccdf(0 | 0,2);
target += normal_lpdf(to_vector(z_1) | 0, 1);
target += lkj_corr_cholesky_lpdf(L_1 | 1);
// likelihood including all constants
if (!prior_only) {
target += student_t_lpdf(Y | nu, mu, sigma);
}
}
generated quantities {
// actual population-level intercept
real b_Intercept = Intercept - dot_product(means_X, b);
// actual population-level intercept
real b_sigma_Intercept = Intercept_sigma - dot_product(means_X_sigma, b_sigma);
// group-level correlations
corr_matrix[M_1] Cor_1 = multiply_lower_tri_self_transpose(L_1);
vector<lower=-1,upper=1>[NC_1] cor_1;
// extract upper diagonal of correlation matrix
for (k in 1:M_1) {
for (j in 1:(k - 1)) {
cor_1[choose(k - 1, 2) + j] = Cor_1[j, k];
}
}
}
Trying to stuff all the necessary variables to compute mu and sigma inside the reduce function will be a lot of pain that I will likely not be able to go through for all options brms offers. Instead, I would think I will end up with computing distributional parameters, such as mu and sigma, outside of the reducer and only parallelize over the lpdf contribution.