Centered parametrization using stan brms code for varying slopes

I am trying to run a varying-slope model by adapting the brms code generated using:

sm = fread("example.csv")
make_stancode(smx ~ time + period + (time + period | fips), data =  sm)

I attached the data I am using example.csv (45.2 KB)

I adapted the stan code using the hint suggested by @martinmodrak here the post:

One can manually edit the generated brms code and use non-centered for all parameters, but that doesn’t help. One could also add a sum-to-zero constrain on the varying intercepts to remove one degree of freedom (I think R-INLA does this by default), this also does not help (at least a simple implementation). However doing both of those things helps

I am not able to make it work. I commented out the original brms code I didn’t use and flagged the code added manually.

// generated with brms 2.16.1
functions {
 /* compute correlated group-level effects
  * Args: 
  *   z: matrix of unscaled group-level effects
  *   SD: vector of standard deviation parameters
  *   L: cholesky factor correlation matrix
  * Returns: 
  *   matrix of scaled group-level effects
  */ 
  matrix scale_r_cor(matrix z, vector SD, matrix L) {
    // r is stored in another dimension order than z
    return transpose(diag_pre_multiply(SD, L) * z);
  }
}
data {
  int<lower=1> N;  // total number of observations
  vector[N] Y;  // response variable
  int<lower=1> K;  // number of population-level effects
  matrix[N, K] X;  // population-level design matrix
  // data for group-level effects of ID 1
  int<lower=1> N_1;  // number of grouping levels
  int<lower=1> M_1;  // number of coefficients per level
  int<lower=1> J_1[N];  // grouping indicator per observation
  // group-level predictor values
  vector[N] Z_1_1;
  vector[N] Z_1_2;
  vector[N] Z_1_3;
  int<lower=1> NC_1;  // number of group-level correlations
  int prior_only;  // should the likelihood be ignored?
}
transformed data {
  int Kc = K - 1;
  matrix[N, Kc] Xc;  // centered version of X without an intercept
  vector[Kc] means_X;  // column means of X before centering
  for (i in 2:K) {
    means_X[i - 1] = mean(X[, i]);
    Xc[, i - 1] = X[, i] - means_X[i - 1];
  }
}
parameters {
  vector[Kc] b;  // population-level effects
  real Intercept;  // temporary intercept for centered predictors
  real<lower=0> sigma;  // dispersion parameter
  vector<lower=0>[M_1] sd_1;  // group-level standard deviations
  cholesky_factor_corr[M_1] L_1;  // cholesky factor of correlation matrix

  // manually added
  matrix[N_1, M_1] r_1;
  vector[N_1] r_1_1;
  vector[N_1] r_1_2;
  vector[N_1] r_1_3;
  // original brms
  // matrix[M_1, N_1] z_1;  // standardized group-level effects
  

}
// original brms
// transformed parameters {
//   matrix[N_1, M_1] r_1;  // actual group-level effects
//   // using vectors speeds up indexing in loops
//   vector[N_1] r_1_1;
//   vector[N_1] r_1_2;
//   vector[N_1] r_1_3;
//   // compute actual group-level effects
//   r_1 = scale_r_cor(z_1, sd_1, L_1);
//   r_1_1 = r_1[, 1];
//   r_1_2 = r_1[, 2];
//   r_1_3 = r_1[, 3];
// }
model {
  // likelihood including constants
  if (!prior_only) {
    // initialize linear predictor term
    vector[N] mu = Intercept + rep_vector(0.0, N);
    for (n in 1:N) {
      // add more terms to the linear predictor
      mu[n] += r_1_1[J_1[n]] * Z_1_1[n] + r_1_2[J_1[n]] * Z_1_2[n] + r_1_3[J_1[n]] * Z_1_3[n];
    }
    target += normal_id_glm_lpdf(Y | Xc, mu, b, sigma);
  }
  // priors including constants
  target += student_t_lpdf(Intercept | 3, 0, 2.5);
  target += student_t_lpdf(sigma | 3, 0, 2.5)
    - 1 * student_t_lccdf(0 | 3, 0, 2.5);
  target += student_t_lpdf(sd_1 | 3, 0, 2.5)
    - 3 * student_t_lccdf(0 | 3, 0, 2.5);
  target += lkj_corr_cholesky_lpdf(L_1 | 1);

  // manually added
  target += normal_lpdf(to_vector(r_1) | 0, sd_1[1]);
  // manual soft sum to zero constraint
  target += normal_lpdf(sum(to_vector(r_1)) | 0, 0.0001 * N_1);
  
  // orginal brms
  // target += std_normal_lpdf(to_vector(z_1));

}
generated quantities {
  // actual population-level intercept
  real b_Intercept = Intercept - dot_product(means_X, b);
  // compute group-level correlations
  corr_matrix[M_1] Cor_1 = multiply_lower_tri_self_transpose(L_1);
  vector<lower=-1,upper=1>[NC_1] cor_1;
  // extract upper diagonal of correlation matrix
  for (k in 1:M_1) {
    for (j in 1:(k - 1)) {
      cor_1[choose(k - 1, 2) + j] = Cor_1[j, k];
    }
  }
}

I am not able to generate the correlation matrix. I am using this function to create a manual brms object:

manual_brms = function(formula, path_stancode, data, chains = 1, iter_warmup = 1000, iter_sampling = 1000, threads = 1) {
    m_centered = cmdstanr::cmdstan_model(path_stancode, cpp_options = list(stan_threads = TRUE))
    sdat = brms::make_standata(formula, data = data) 
    fit_centered = m_centered$sample(data = sdat,
        chains = chains,
        iter_warmup = iter_warmup,
        iter_sampling = iter_sampling,
        threads_per_chain = threads)
    bm_manual = brm(formula, data = data, empty = TRUE) 
    bm_manual$fit <- rstan::read_stan_csv(fit_centered$output_files())
    bm_manual <- brms::rename_pars(bm_manual)
    return(bm_manual)
}

Then I run:

f = formula(smx ~ time + period + (time + period | fips))
output = manual_brms(f, "src/testing.stan", sm)
summary(output)

> summary(output)
Error: The following variables are missing in the draws object: {'Cor_1[1,1]', 'Cor_1[2,1]', 'Cor_1[3,1]', 'Cor_1[1,2]', 'Cor_1[2,2]', 'Cor_1[3,2]', 'Cor_1[1,3]', 'Cor_1[2,3]', 'Cor_1[3,3]'}

Thank you for any suggestions and comments,
Sebastian

1 Like

Hi,
yes, the case for correlated predictors is a bit more tricky.
I think you need something like r_1 ~ multinormal_cholesky(diag_pre_multiply(sd_1, L_1) in your model block (the r_1_X are then just slices of that matrix to be built in the transformed parameters block). This should follow from Multivariate normal distribution - Wikipedia, but please check my reasoning, I am not very strong in linear algebra.

Not sure about the “missing parameters” error - the code you posted seems to include the Cor_1 parameter in generated quantities, are you sure the code you shared is the one you ran the model with?

Best of luck with your model!

Maybe check out the example here, which is raw Stan but has a data variable you can use to toggle on/off monolithic centered/non-centered parameterization.

The R code uses the probably-to-be-abandoned aria package, but it should be straightforward to use cmdstanr instead.