If I define the following data and model:
library(brms)
library(rethinking)
## Sample the predictors
X <- cbind(1, rmvnorm2(100, Mu = rep(0,100), sigma = rep(1, 100), Rho = rlkjcorr(1, 100, 1)))
## Sample the parameters
beta <- rnorm(101)
## Compute the linear predictor
mu <- X %*% beta
## Sample the observed values
y <- rnorm(100, mu, sd = 0.5)
## Store everything in a data.frame
Dat <- data.frame(y = y)
Dat$X <- X
## Define the formula
formul <- bf(y ~ X)
## Define the priors
priors <- prior(horseshoe(df = 1, par_ratio = 0.5), class= b)
## DIsplay stan code
make_stancode(formul, data = Dat, prior = priors)
I obtain the following stan code, with the prior being scaled by the residual standard-error.
// generated with brms 2.8.8
functions {
/* Efficient computation of the horseshoe prior
* Args:
* zb: standardized population-level coefficients
* global: global horseshoe parameters
* local: local horseshoe parameters
* scale_global: global scale of the horseshoe prior
* c2: positive real number for regularization
* Returns:
* population-level coefficients following the horseshoe prior
*/
vector horseshoe(vector zb, vector[] local, real[] global,
real scale_global, real c2) {
int K = rows(zb);
vector[K] lambda = local[1] .* sqrt(local[2]);
vector[K] lambda2 = square(lambda);
real tau = global[1] * sqrt(global[2]) * scale_global;
vector[K] lambda_tilde = sqrt(c2 * lambda2 ./ (c2 + tau^2 * lambda2));
return zb .* lambda_tilde * tau;
}
}
data {
int<lower=1> N; // number of observations
vector[N] Y; // response variable
int<lower=1> K; // number of population-level effects
matrix[N, K] X; // population-level design matrix
real<lower=0> hs_df;
real<lower=0> hs_df_global;
real<lower=0> hs_df_slab;
real<lower=0> hs_scale_global;
real<lower=0> hs_scale_slab;
int prior_only; // should the likelihood be ignored?
}
transformed data {
int Kc = K - 1;
matrix[N, Kc] Xc; // centered version of X
vector[Kc] means_X; // column means of X before centering
for (i in 2:K) {
means_X[i - 1] = mean(X[, i]);
Xc[, i - 1] = X[, i] - means_X[i - 1];
}
}
parameters {
// local parameters for horseshoe prior
vector[Kc] zb;
vector<lower=0>[Kc] hs_local[2];
real temp_Intercept; // temporary intercept
// horseshoe shrinkage parameters
real<lower=0> hs_global[2];
real<lower=0> hs_c2;
real<lower=0> sigma; // residual SD
}
transformed parameters {
vector[Kc] b; // population-level effects
b = horseshoe(zb, hs_local, hs_global, hs_scale_global * sigma, hs_scale_slab^2 * hs_c2);
}
model {
vector[N] mu = temp_Intercept + Xc * b;
// priors including all constants
target += normal_lpdf(zb | 0, 1);
target += normal_lpdf(hs_local[1] | 0, 1)
- 101 * log(0.5);
target += inv_gamma_lpdf(hs_local[2] | 0.5 * hs_df, 0.5 * hs_df);
target += student_t_lpdf(temp_Intercept | 3, 3, 10);
target += normal_lpdf(hs_global[1] | 0, 1)
- 1 * log(0.5);
target += inv_gamma_lpdf(hs_global[2] | 0.5 * hs_df_global, 0.5 * hs_df_global);
target += inv_gamma_lpdf(hs_c2 | 0.5 * hs_df_slab, 0.5 * hs_df_slab);
target += student_t_lpdf(sigma | 3, 0, 10)
- 1 * student_t_lccdf(0 | 3, 0, 10);
// likelihood including all constants
if (!prior_only) {
target += normal_lpdf(Y | mu, sigma);
}
}
generated quantities {
// actual population-level intercept
real b_Intercept = temp_Intercept - dot_product(means_X, b);
}
If I remove the autoscaling
## Define the priors
priors <- prior(horseshoe(df = 1, par_ratio = 0.5, autoscale = F), class= b)
## DIsplay stan code
make_stancode(formul, data = Dat, prior = priors)
I obtain this code
// generated with brms 2.8.8
functions {
/* Efficient computation of the horseshoe prior
* Args:
* zb: standardized population-level coefficients
* global: global horseshoe parameters
* local: local horseshoe parameters
* scale_global: global scale of the horseshoe prior
* c2: positive real number for regularization
* Returns:
* population-level coefficients following the horseshoe prior
*/
vector horseshoe(vector zb, vector[] local, real[] global,
real scale_global, real c2) {
int K = rows(zb);
vector[K] lambda = local[1] .* sqrt(local[2]);
vector[K] lambda2 = square(lambda);
real tau = global[1] * sqrt(global[2]) * scale_global;
vector[K] lambda_tilde = sqrt(c2 * lambda2 ./ (c2 + tau^2 * lambda2));
return zb .* lambda_tilde * tau;
}
}
data {
int<lower=1> N; // number of observations
vector[N] Y; // response variable
int<lower=1> K; // number of population-level effects
matrix[N, K] X; // population-level design matrix
real<lower=0> hs_df;
real<lower=0> hs_df_global;
real<lower=0> hs_df_slab;
real<lower=0> hs_scale_global;
real<lower=0> hs_scale_slab;
int prior_only; // should the likelihood be ignored?
}
transformed data {
int Kc = K - 1;
matrix[N, Kc] Xc; // centered version of X
vector[Kc] means_X; // column means of X before centering
for (i in 2:K) {
means_X[i - 1] = mean(X[, i]);
Xc[, i - 1] = X[, i] - means_X[i - 1];
}
}
parameters {
// local parameters for horseshoe prior
vector[Kc] zb;
vector<lower=0>[Kc] hs_local[2];
real temp_Intercept; // temporary intercept
// horseshoe shrinkage parameters
real<lower=0> hs_global[2];
real<lower=0> hs_c2;
real<lower=0> sigma; // residual SD
}
transformed parameters {
vector[Kc] b; // population-level effects
b = horseshoe(zb, hs_local, hs_global, hs_scale_global, hs_scale_slab^2 * hs_c2);
}
model {
vector[N] mu = temp_Intercept + Xc * b;
// priors including all constants
target += normal_lpdf(zb | 0, 1);
target += normal_lpdf(hs_local[1] | 0, 1)
- 101 * log(0.5);
target += inv_gamma_lpdf(hs_local[2] | 0.5 * hs_df, 0.5 * hs_df);
target += student_t_lpdf(temp_Intercept | 3, 3, 10);
target += normal_lpdf(hs_global[1] | 0, 1)
- 1 * log(0.5);
target += inv_gamma_lpdf(hs_global[2] | 0.5 * hs_df_global, 0.5 * hs_df_global);
target += inv_gamma_lpdf(hs_c2 | 0.5 * hs_df_slab, 0.5 * hs_df_slab);
target += student_t_lpdf(sigma | 3, 0, 10)
- 1 * student_t_lccdf(0 | 3, 0, 10);
// likelihood including all constants
if (!prior_only) {
target += normal_lpdf(Y | mu, sigma);
}
}
generated quantities {
// actual population-level intercept
real b_Intercept = temp_Intercept - dot_product(means_X, b);
}
Cheers,
Lucas