I am trying to understand the stan model (model_simple) generated by brms in the vignette (https://cran.r-project.org/web/packages/brms/vignettes/brms_phylogenetics.html). How are certain data variables (predictors) generated by brms is not completely clear to me. Two questions are listed below the code.
Code from the vignette:
phylo <- ape::read.nexus("https://paul-buerkner.github.io/data/phylo.nex")
data_simple <- read.table(
"https://paul-buerkner.github.io/data/data_simple.txt",
header = TRUE
)
A <- ape::vcv.phylo(phylo)
model_simple <- brm(
phen ~ cofactor + (1|gr(phylo, cov = A)),
data = data_simple,
family = gaussian(),
data2 = list(A = A),
prior = c(
prior(normal(0, 10), "b"),
prior(normal(0, 50), "Intercept"),
prior(student_t(3, 0, 20), "sd"),
prior(student_t(3, 0, 20), "sigma")
),
sample_prior = TRUE, chains = 2, cores = 2,
iter = 4000, warmup = 1000
)
model_simple$model
// generated with brms 2.14.0
functions {
}
data {
int<lower=1> N; // total number of observations
vector[N] Y; // response variable
int<lower=1> K; // number of population-level effects
matrix[N, K] X; // population-level design matrix
// data for group-level effects of ID 1
int<lower=1> N_1; // number of grouping levels
int<lower=1> M_1; // number of coefficients per level
int<lower=1> J_1[N]; // grouping indicator per observation
matrix[N_1, N_1] Lcov_1; // cholesky factor of known covariance matrix
// group-level predictor values
vector[N] Z_1_1;
int prior_only; // should the likelihood be ignored?
}
transformed data {
int Kc = K - 1;
matrix[N, Kc] Xc; // centered version of X without an intercept
vector[Kc] means_X; // column means of X before centering
for (i in 2:K) {
means_X[i - 1] = mean(X[, i]);
Xc[, i - 1] = X[, i] - means_X[i - 1];
}
}
parameters {
vector[Kc] b; // population-level effects
real Intercept; // temporary intercept for centered predictors
real<lower=0> sigma; // residual SD
vector<lower=0>[M_1] sd_1; // group-level standard deviations
vector[N_1] z_1[M_1]; // standardized group-level effects
}
transformed parameters {
vector[N_1] r_1_1; // actual group-level effects
r_1_1 = (sd_1[1] * (Lcov_1 * z_1[1]));
}
model {
// likelihood including all constants
if (!prior_only) {
// initialize linear predictor term
vector[N] mu = Intercept + rep_vector(0.0, N);
for (n in 1:N) {
// add more terms to the linear predictor
mu[n] += r_1_1[J_1[n]] * Z_1_1[n];
}
target += normal_id_glm_lpdf(Y | Xc, mu, b, sigma);
}
// priors including all constants
target += normal_lpdf(b | 0, 10);
target += normal_lpdf(Intercept | 0, 50);
target += student_t_lpdf(sigma | 3, 0, 20)
- 1 * student_t_lccdf(0 | 3, 0, 20);
target += student_t_lpdf(sd_1 | 3, 0, 20)
- 1 * student_t_lccdf(0 | 3, 0, 20);
target += std_normal_lpdf(z_1[1]);
}
generated quantities {
// actual population-level intercept
real b_Intercept = Intercept - dot_product(means_X, b);
// additionally draw samples from priors
real prior_b = normal_rng(0,10);
real prior_Intercept = normal_rng(0,50);
real prior_sigma = student_t_rng(3,0,20);
real prior_sd_1 = student_t_rng(3,0,20);
// use rejection sampling for truncated priors
while (prior_sigma < 0) {
prior_sigma = student_t_rng(3,0,20);
}
while (prior_sd_1 < 0) {
prior_sd_1 = student_t_rng(3,0,20);
}
}
Question 1: what is contained in the vector Z_1_1 (how is this vector generated)?
Question 2: is it possible to extract the data from the data and transformed data from brms output (or stan model in general)?