It might help to look at the Stan data and the Stan code that brms calls. For example,
library(rstan)
library(brms)
x <- rnorm(10,0,1)
y <- 3 + 0.5*x + 0.75*x^2 + rnorm(10,0,0.1)
d <- cbind.data.frame(y,x)
standata <- make_standata(y ~ 0 + Intercept + s(x), data=d)
Results in:
$N
[1] 10
$Y
[1] 3.099119 4.972620 3.170411 4.385173 3.344585 3.239739 8.127253 4.172452 6.656621 3.038228
$K
[1] 1
$X
Intercept
1 1
2 1
3 1
4 1
5 1
6 1
7 1
8 1
9 1
10 1
$nb_1
[1] 1
$knots_1
Zs_1_1
8
$Zs_1_1
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
[1,] 0.06730124 -0.0015537841 -0.0016525991 -0.019986184 0.018824992 0.065588801 -0.034061338 -0.085315463
[2,] -0.05982110 0.0009866849 0.0002663504 0.007964710 -0.002670065 -0.039284372 0.059863813 0.052302420
[3,] 0.02898126 -0.0010104747 -0.0013267389 -0.008523016 0.018555381 0.041095329 -0.019466633 -0.091204959
[4,] -0.06631327 0.0006617158 0.0037670562 0.008914225 -0.007631189 -0.012008662 0.047046244 0.012245111
[5,] -0.01211092 -0.0017940785 -0.0001682632 -0.011147141 0.005869326 0.019599207 -0.005869238 -0.079664684
[6,] -0.01851463 0.0005611443 -0.0006554222 -0.011200458 0.003248949 0.017045865 -0.002989663 -0.075842927
[7,] 0.01509833 0.0026985738 0.0027100126 0.030448855 -0.025349155 -0.090203827 -0.012482367 0.175241206
[8,] -0.06386355 0.0001367504 -0.0024243861 0.007195261 -0.010481485 -0.002575562 0.037437954 -0.008342206
[9,] -0.01723544 0.0019649051 0.0021586501 0.025791263 -0.027044872 -0.088564825 0.041480472 0.124480361
[10,] 0.12647809 -0.0026514370 -0.0026746599 -0.029457516 0.026678117 0.089308046 -0.110959245 -0.023898859
attr(,"s.label")
[1] "s(x)"
$Ks
[1] 1
$Xs
sx_1
[1,] -0.046583614
[2,] 0.030075114
[3,] -0.033278104
[4,] 0.016367929
[5,] -0.019446637
[6,] -0.017058152
[7,] 0.081303943
[8,] 0.009561684
[9,] 0.059026510
[10,] -0.079968673
attr(,"smcols")
attr(,"smcols")[[1]]
[1] 1
attr(,"bylevels")
attr(,"bylevels")$`s(x)`
NULL
$prior_only
[1] 0
attr(,"class")
[1] "standata" "list"
And, the Stan code call stancode <- make_stancode(y ~ 0 + Intercept + s(x), data=d)
looks like:
// generated with brms 2.17.0
functions {
}
data {
int<lower=1> N; // total number of observations
vector[N] Y; // response variable
int<lower=1> K; // number of population-level effects
matrix[N, K] X; // population-level design matrix
// data for splines
int Ks; // number of linear effects
matrix[N, Ks] Xs; // design matrix for the linear effects
// data for spline s(x)
int nb_1; // number of bases
int knots_1[nb_1]; // number of knots
// basis function matrices
matrix[N, knots_1[1]] Zs_1_1;
int prior_only; // should the likelihood be ignored?
}
transformed data {
}
parameters {
vector[K] b; // population-level effects
vector[Ks] bs; // spline coefficients
// parameters for spline s(x)
// standarized spline coefficients
vector[knots_1[1]] zs_1_1;
real<lower=0> sds_1_1; // standard deviations of spline coefficients
real<lower=0> sigma; // dispersion parameter
}
transformed parameters {
// actual spline coefficients
vector[knots_1[1]] s_1_1;
real lprior = 0; // prior contributions to the log posterior
// compute actual spline coefficients
s_1_1 = sds_1_1 * zs_1_1;
lprior += student_t_lpdf(sds_1_1 | 3, 0, 2.5)
- 1 * student_t_lccdf(0 | 3, 0, 2.5);
lprior += student_t_lpdf(sigma | 3, 0, 2.5)
- 1 * student_t_lccdf(0 | 3, 0, 2.5);
}
model {
// likelihood including constants
if (!prior_only) {
// initialize linear predictor term
vector[N] mu = rep_vector(0.0, N) + Xs * bs + Zs_1_1 * s_1_1;
target += normal_id_glm_lpdf(Y | X, mu, b, sigma);
}
// priors including constants
target += lprior;
target += std_normal_lpdf(zs_1_1);
}
generated quantities {
}
It’s then helpful to compare the output of the same data and code in rstan to that when using brms.
Here is rstan:
m1 <- stan(model_code = stancode, data = standata,
chains = 1, iter = 2000, warmup = 1000)
m1
And here is brms:
So the formula for mu is vector[N] mu = rep_vector(0.0, N) + Xs * bs + Zs_1_1 * s_1_1;
where Zs_1_1
is an N by number of knots matrix that is in the data block, and s_1_1
is the vector of actual spline coefficients from the non-centered parameterization in the transformed parameters block. The sx_1
in the brms results is the same as bs[1]
in the rstan results. Xs
is a vector of length N. The Stan code behind the brms model seems pretty much like your typical implementation of varying effects using non-centered parameterization, but I the part that seems obscure is where do Xs
and Zs_1_1
come from…this is something from mgcv that is passed to Stan data, which would require some digging on that side.