Here is an example of how complex predictor terms can easily get with brms:
set.seed(1234)
dat <- data.frame(
  count = rpois(236, lambda = 20),
  visit = rep(1:4, each = 59),
  patient = factor(rep(1:59, 4)),
  Age = rnorm(236), 
  Trt = factor(sample(0:1, 236, TRUE)),
  AgeSD = abs(rnorm(236, 1)),
  Exp = sample(1:5, 236, TRUE),
  volume = rnorm(236),
  gender = factor(c(rep("m", 30), rep("f", 29)))
)
make_stancode(
  formula = 
    bf(count ~ Trt*Age*mo(Exp) + s(Age) +
       offset(Age) + (1+Trt|visit),
       sigma ~ Trt),
  data = dat, family = student(), 
  prior = set_prior("normal(0,2)", class = "b") +
    set_prior("cauchy(0,2)", class = "sd") +
    set_prior("normal(0,3)", dpar = "sigma")
)
which results in the Stan code
// generated with brms 2.11.2
functions {
  /* compute the logm1 link 
   * Args: 
   *   p: a positive scalar
   * Returns: 
   *   a scalar in (-Inf, Inf)
   */ 
   real logm1(real y) { 
     return log(y - 1);
   }
  /* compute the inverse of the logm1 link 
   * Args: 
   *   y: a scalar in (-Inf, Inf)
   * Returns: 
   *   a positive scalar
   */ 
   real expp1(real y) { 
     return exp(y) + 1;
   }
  /* compute monotonic effects
   * Args:
   *   scale: a simplex parameter
   *   i: index to sum over the simplex
   * Returns:
   *   a scalar between 0 and 1
   */
  real mo(vector scale, int i) {
    if (i == 0) {
      return 0;
    } else {
      return rows(scale) * sum(scale[1:i]);
    }
  }
}
data {
  int<lower=1> N;  // number of observations
  vector[N] Y;  // response variable
  int<lower=1> K;  // number of population-level effects
  matrix[N, K] X;  // population-level design matrix
  int<lower=1> Ksp;  // number of special effects terms
  // covariates of special effects terms
  vector[N] Csp_1;
  vector[N] Csp_2;
  vector[N] Csp_3;
  int<lower=1> Imo;  // number of monotonic variables
  int<lower=1> Jmo[Imo];  // length of simplexes
  // monotonic variables
  int Xmo_1[N];
  int Xmo_2[N];
  int Xmo_3[N];
  int Xmo_4[N];
  // prior concentration of monotonic simplexes
  vector[Jmo[1]] con_simo_1;
  vector[Jmo[2]] con_simo_2;
  vector[Jmo[3]] con_simo_3;
  vector[Jmo[4]] con_simo_4;
  // data for splines
  int Ks;  // number of linear effects
  matrix[N, Ks] Xs;  // design matrix for the linear effects
  // data for spline s(Age)
  int nb_1;  // number of bases
  int knots_1[nb_1];  // number of knots
  // basis function matrices
  matrix[N, knots_1[1]] Zs_1_1;
  vector[N] offset;
  int<lower=1> K_sigma;  // number of population-level effects
  matrix[N, K_sigma] X_sigma;  // population-level design matrix
  // data for group-level effects of ID 1
  int<lower=1> N_1;  // number of grouping levels
  int<lower=1> M_1;  // number of coefficients per level
  int<lower=1> J_1[N];  // grouping indicator per observation
  // group-level predictor values
  vector[N] Z_1_1;
  vector[N] Z_1_2;
  int<lower=1> NC_1;  // number of group-level correlations
  int prior_only;  // should the likelihood be ignored?
}
transformed data {
  int Kc = K - 1;
  matrix[N, Kc] Xc;  // centered version of X without an intercept
  vector[Kc] means_X;  // column means of X before centering
  int Kc_sigma = K_sigma - 1;
  matrix[N, Kc_sigma] Xc_sigma;  // centered version of X_sigma without an intercept
  vector[Kc_sigma] means_X_sigma;  // column means of X_sigma before centering
  for (i in 2:K) {
    means_X[i - 1] = mean(X[, i]);
    Xc[, i - 1] = X[, i] - means_X[i - 1];
  }
  for (i in 2:K_sigma) {
    means_X_sigma[i - 1] = mean(X_sigma[, i]);
    Xc_sigma[, i - 1] = X_sigma[, i] - means_X_sigma[i - 1];
  }
}
parameters {
  vector[Kc] b;  // population-level effects
  // temporary intercept for centered predictors
  real Intercept;
  // special effects coefficients
  vector[Ksp] bsp;
  // simplexes of monotonic effects
  simplex[Jmo[1]] simo_1;
  simplex[Jmo[2]] simo_2;
  simplex[Jmo[3]] simo_3;
  simplex[Jmo[4]] simo_4;
  // spline coefficients
  vector[Ks] bs;
  // parameters for spline s(Age)
  // standarized spline coefficients
  vector[knots_1[1]] zs_1_1;
  // standard deviations of the coefficients
  real<lower=0> sds_1_1;
  vector[Kc_sigma] b_sigma;  // population-level effects
  // temporary intercept for centered predictors
  real Intercept_sigma;
  real<lower=1> nu;  // degrees of freedom or shape
  vector<lower=0>[M_1] sd_1;  // group-level standard deviations
  matrix[M_1, N_1] z_1;  // standardized group-level effects
  // cholesky factor of correlation matrix
  cholesky_factor_corr[M_1] L_1;
}
transformed parameters {
  // actual spline coefficients
  vector[knots_1[1]] s_1_1 = sds_1_1 * zs_1_1;
  // actual group-level effects
  matrix[N_1, M_1] r_1 = (diag_pre_multiply(sd_1, L_1) * z_1)';
  // using vectors speeds up indexing in loops
  vector[N_1] r_1_1 = r_1[, 1];
  vector[N_1] r_1_2 = r_1[, 2];
}
model {
  // initialize linear predictor term
  vector[N] mu = Intercept + Xc * b + Xs * bs + Zs_1_1 * s_1_1 + offset;
  // initialize linear predictor term
  vector[N] sigma = Intercept_sigma + Xc_sigma * b_sigma;
  for (n in 1:N) {
    // add more terms to the linear predictor
    mu[n] += (bsp[1]) * mo(simo_1, Xmo_1[n]) + (bsp[2]) * mo(simo_2, Xmo_2[n]) * Csp_1[n] + (bsp[3]) * mo(simo_3, Xmo_3[n]) * Csp_2[n] + (bsp[4]) * mo(simo_4, Xmo_4[n]) * Csp_3[n] + r_1_1[J_1[n]] * Z_1_1[n] + r_1_2[J_1[n]] * Z_1_2[n];
  }
  for (n in 1:N) {
    // apply the inverse link function
    sigma[n] = exp(sigma[n]);
  }
  // priors including all constants
  target += normal_lpdf(b | 0,2);
  target += student_t_lpdf(Intercept | 3, 20, 10);
  target += normal_lpdf(bsp | 0,2);
  target += dirichlet_lpdf(simo_1 | con_simo_1);
  target += dirichlet_lpdf(simo_2 | con_simo_2);
  target += dirichlet_lpdf(simo_3 | con_simo_3);
  target += dirichlet_lpdf(simo_4 | con_simo_4);
  target += normal_lpdf(bs | 0,2)
    - 1 * normal_lccdf(0 | 0,2);
  target += normal_lpdf(zs_1_1 | 0, 1);
  target += student_t_lpdf(sds_1_1 | 3, 0, 10)
    - 1 * student_t_lccdf(0 | 3, 0, 10);
  target += normal_lpdf(b_sigma | 0,3);
  target += student_t_lpdf(Intercept_sigma | 3, 0, 10);
  target += gamma_lpdf(nu | 2, 0.1)
    - 1 * gamma_lccdf(1 | 2, 0.1);
  target += cauchy_lpdf(sd_1 | 0,2)
    - 2 * cauchy_lccdf(0 | 0,2);
  target += normal_lpdf(to_vector(z_1) | 0, 1);
  target += lkj_corr_cholesky_lpdf(L_1 | 1);
  // likelihood including all constants
  if (!prior_only) {
    target += student_t_lpdf(Y | nu, mu, sigma);
  }
}
generated quantities {
  // actual population-level intercept
  real b_Intercept = Intercept - dot_product(means_X, b);
  // actual population-level intercept
  real b_sigma_Intercept = Intercept_sigma - dot_product(means_X_sigma, b_sigma);
  // group-level correlations
  corr_matrix[M_1] Cor_1 = multiply_lower_tri_self_transpose(L_1);
  vector<lower=-1,upper=1>[NC_1] cor_1;
  // extract upper diagonal of correlation matrix
  for (k in 1:M_1) {
    for (j in 1:(k - 1)) {
      cor_1[choose(k - 1, 2) + j] = Cor_1[j, k];
    }
  }
}
Trying to stuff all the necessary variables to compute mu and sigma inside the reduce function will be a lot of pain that I will likely not be able to go through for all options brms offers. Instead, I would think I will end up with computing distributional parameters, such as mu and sigma, outside of the reducer and only parallelize over the lpdf contribution.