Hi,
I know that non-centered parameterization is generally preferred over centered parameterization but in some cases, it is advised to use centered parameterization.
“when there is a lot of data, the centered parameterization is more efficient” https://mc-stan.org/docs/2_18/stan-users-guide/reparameterization-section.html
Since brms follows the non-centered parameterization approach, can someone please guide me how best to change brms generated stan code from non-centered to centered parameterization.
Below is an example of stan code generated for a nonlinear model with three correlated random effects.
library(brms)
b <- c(2, 0.75)
x <- rnorm(100)
y <- rnorm(100, mean = b[1] * exp(b[2] * x))
dat1 <- data.frame(x, y)
fit_loss <- brm(
bf(cum ~ ult * (1 - exp(-(dev/theta)^omega)),
ult ~ 1 + (1|i|AY),
omega ~ 1 + (1|i|AY),
theta ~ 1 + (1|i|AY),
nl = TRUE),
data = loss, family = gaussian(),
prior = c(
prior(normal(5000, 1000), nlpar = "ult"),
prior(normal(1, 2), nlpar = "omega"),
prior(normal(45, 10), nlpar = "theta")
),
control = list(adapt_delta = 0.9)
)
# scode <- stancode(fit_loss)
scode <-
"
// generated with brms 2.20.3
functions {
/* compute correlated group-level effects
* Args:
* z: matrix of unscaled group-level effects
* SD: vector of standard deviation parameters
* L: cholesky factor correlation matrix
* Returns:
* matrix of scaled group-level effects
*/
matrix scale_r_cor(matrix z, vector SD, matrix L) {
// r is stored in another dimension order than z
return transpose(diag_pre_multiply(SD, L) * z);
}
}
data {
int<lower=1> N; // total number of observations
vector[N] Y; // response variable
int<lower=1> K_ult; // number of population-level effects
matrix[N, K_ult] X_ult; // population-level design matrix
int<lower=1> K_omega; // number of population-level effects
matrix[N, K_omega] X_omega; // population-level design matrix
int<lower=1> K_theta; // number of population-level effects
matrix[N, K_theta] X_theta; // population-level design matrix
// covariates for non-linear functions
array[N] int C_1;
// data for group-level effects of ID 1
int<lower=1> N_1; // number of grouping levels
int<lower=1> M_1; // number of coefficients per level
array[N] int<lower=1> J_1; // grouping indicator per observation
// group-level predictor values
vector[N] Z_1_ult_1;
vector[N] Z_1_omega_2;
vector[N] Z_1_theta_3;
int<lower=1> NC_1; // number of group-level correlations
int prior_only; // should the likelihood be ignored?
}
transformed data {
}
parameters {
vector[K_ult] b_ult; // regression coefficients
vector[K_omega] b_omega; // regression coefficients
vector[K_theta] b_theta; // regression coefficients
real<lower=0> sigma; // dispersion parameter
vector<lower=0>[M_1] sd_1; // group-level standard deviations
matrix[M_1, N_1] z_1; // standardized group-level effects
cholesky_factor_corr[M_1] L_1; // cholesky factor of correlation matrix
}
transformed parameters {
matrix[N_1, M_1] r_1; // actual group-level effects
// using vectors speeds up indexing in loops
vector[N_1] r_1_ult_1;
vector[N_1] r_1_omega_2;
vector[N_1] r_1_theta_3;
real lprior = 0; // prior contributions to the log posterior
// compute actual group-level effects
r_1 = scale_r_cor(z_1, sd_1, L_1);
r_1_ult_1 = r_1[, 1];
r_1_omega_2 = r_1[, 2];
r_1_theta_3 = r_1[, 3];
lprior += normal_lpdf(b_ult | 5000, 1000);
lprior += normal_lpdf(b_omega | 1, 2);
lprior += normal_lpdf(b_theta | 45, 10);
lprior += student_t_lpdf(sigma | 3, 0, 1963.7)
- 1 * student_t_lccdf(0 | 3, 0, 1963.7);
lprior += student_t_lpdf(sd_1 | 3, 0, 1963.7)
- 3 * student_t_lccdf(0 | 3, 0, 1963.7);
lprior += lkj_corr_cholesky_lpdf(L_1 | 1);
}
model {
// likelihood including constants
if (!prior_only) {
// initialize linear predictor term
vector[N] nlp_ult = rep_vector(0.0, N);
// initialize linear predictor term
vector[N] nlp_omega = rep_vector(0.0, N);
// initialize linear predictor term
vector[N] nlp_theta = rep_vector(0.0, N);
// initialize non-linear predictor term
vector[N] mu;
nlp_ult += X_ult * b_ult;
nlp_omega += X_omega * b_omega;
nlp_theta += X_theta * b_theta;
for (n in 1:N) {
// add more terms to the linear predictor
nlp_ult[n] += r_1_ult_1[J_1[n]] * Z_1_ult_1[n];
}
for (n in 1:N) {
// add more terms to the linear predictor
nlp_omega[n] += r_1_omega_2[J_1[n]] * Z_1_omega_2[n];
}
for (n in 1:N) {
// add more terms to the linear predictor
nlp_theta[n] += r_1_theta_3[J_1[n]] * Z_1_theta_3[n];
}
for (n in 1:N) {
// compute non-linear predictor values
mu[n] = (nlp_ult[n] * (1 - exp( - (C_1[n] / nlp_theta[n]) ^ nlp_omega[n])));
}
target += normal_lpdf(Y | mu, sigma);
}
// priors including constants
target += lprior;
target += std_normal_lpdf(to_vector(z_1));
}
generated quantities {
// compute group-level correlations
corr_matrix[M_1] Cor_1 = multiply_lower_tri_self_transpose(L_1);
vector<lower=-1,upper=1>[NC_1] cor_1;
// extract upper diagonal of correlation matrix
for (k in 1:M_1) {
for (j in 1:(k - 1)) {
cor_1[choose(k - 1, 2) + j] = Cor_1[j, k];
}
}
}
"
Thank you