I was comparing a brms model with one that I coded directly in stan.
# brms model
f <- bm("Y_ls ~ 1 + Vars + (1|Subject)"
pr <- get_prior(formula = f, data = d)
pr$prior[1] <- paste0("normal(0,0.5)")
pr[pr$class == "Intercept", "prior" ] <- paste0("normal(0,1)")
pr[pr$class == "sigma" , "prior" ] <- paste0("exponential(1)")
m <- brm(data = d,
family = student,
formula = f,
prior = c(pr,
prior(gamma(15, 1.5), class = nu)),
warmup = 1500,
iter = 4000,
chains = 4,
cores = 4,
control = list(adapt_delta = 0.99) )
My stan code which, I assumed, should perform the same as the brms model.
data {
int<lower=1> N;
int<lower=1> N_Subject;
int<lower=1> K;
matrix[N, K] X;
matrix[N, N_Subject] X_Subject;
real Y_ls[N];
}
parameters {
// vector[N] mu;
real<lower=0> alpha;
vector[K] beta;
vector[N_animal] beta_Subject;
real<lower=0> sigma;
real<lower=0> sigma_Subject;
real<lower=0> nu;
}
model {
vector[N] mu;
//priors
sigma ~ exponential(1);
sigma_Subject ~ student_t(3,0,2.5);
alpha ~ normal(0,1);
beta ~ normal(0,0.5);
beta_animal ~ normal(0,0.5);
nu ~ gamma(15, 1.5);
// model
mu = alpha + X * beta + X_Subject * beta_Subject * sigma_Subject;
Y_ls ~ student_t(nu, mu, sigma);
}
Both models return exactly the same posterior values (safe some sampling variation) for the predictor variables. However the distributions for the group effects are completely different, although the medians are in the same directions. For clarity the figure shows a selection for the different posterior distributions for subjects between the two methods.
Unfortunately I don’t understand very well the returned stan code of the brms model so I guess there is a transformation that I am missing. Could someone explain the difference in distributions to me?
Personally I think the rstan method with the near Gaussian distros is more likely but I would like to have an expert opinion on this matter.
Thanks!
PS.
I added the stan code from the brms model as well.
// generated with brms 2.16.3
functions {
/* compute the logm1 link
* Args:
* p: a positive scalar
* Returns:
* a scalar in (-Inf, Inf)
*/
real logm1(real y) {
return log(y - 1);
}
/* compute the inverse of the logm1 link
* Args:
* y: a scalar in (-Inf, Inf)
* Returns:
* a positive scalar
*/
real expp1(real y) {
return exp(y) + 1;
}
}
data {
int<lower=1> N; // total number of observations
vector[N] Y; // response variable
int<lower=1> K; // number of population-level effects
matrix[N, K] X; // population-level design matrix
// data for group-level effects of ID 1
int<lower=1> N_1; // number of grouping levels
int<lower=1> M_1; // number of coefficients per level
int<lower=1> J_1[N]; // grouping indicator per observation
// group-level predictor values
vector[N] Z_1_1;
int prior_only; // should the likelihood be ignored?
}
transformed data {
int Kc = K - 1;
matrix[N, Kc] Xc; // centered version of X without an intercept
vector[Kc] means_X; // column means of X before centering
for (i in 2:K) {
means_X[i - 1] = mean(X[, i]);
Xc[, i - 1] = X[, i] - means_X[i - 1];
}
}
parameters {
vector[Kc] b; // population-level effects
real Intercept; // temporary intercept for centered predictors
real<lower=0> sigma; // dispersion parameter
real<lower=1> nu; // degrees of freedom or shape
vector<lower=0>[M_1] sd_1; // group-level standard deviations
vector[N_1] z_1[M_1]; // standardized group-level effects
}
transformed parameters {
vector[N_1] r_1_1; // actual group-level effects
r_1_1 = (sd_1[1] * (z_1[1]));
}
model {
// likelihood including constants
if (!prior_only) {
// initialize linear predictor term
vector[N] mu = Intercept + Xc * b;
for (n in 1:N) {
// add more terms to the linear predictor
mu[n] += r_1_1[J_1[n]] * Z_1_1[n];
}
target += student_t_lpdf(Y | nu, mu, sigma);
}
// priors including constants
target += normal_lpdf(b | 0,0.5);
target += normal_lpdf(Intercept | 0,1);
target += exponential_lpdf(sigma | 1);
target += gamma_lpdf(nu | 15, 1.5)
- 1 * gamma_lccdf(1 | 15, 1.5);
target += student_t_lpdf(sd_1 | 3, 0, 2.5)
- 1 * student_t_lccdf(0 | 3, 0, 2.5);
target += std_normal_lpdf(z_1[1]);
}
generated quantities {
// actual population-level intercept
real b_Intercept = Intercept - dot_product(means_X, b);
}