As I should have done in the initial post, here is a fully reproducible example:
Compile model
stan_file_variables <- write_stan_file("
functions {
/* compute correlated group-level effects
* Args:
* z: matrix of unscaled group-level effects
* SD: vector of standard deviation parameters
* L: cholesky factor correlation matrix
* Returns:
* matrix of scaled group-level effects
*/
matrix scale_r_cor(matrix z, vector SD, matrix L) {
// r is stored in another dimension order than z
return transpose(diag_pre_multiply(SD, L) * z);
}
}
data {
int<lower=1> N; // total number of observations
array[N] int Y1; // response variable
array[N] int Y2; // response variable
array[N] int Y3; // response variable
array[N] int<lower=1> J; // grouping indicator for subjects
int<lower=1> K; // number of population-level effects
matrix[N, K] X; // population-level design matrix
vector[N] Z_Int;
vector[N] Z_X2;
vector[N] Z_X3;
vector[N] Z_X4;
// data for group-level effects subject
int<lower=1> N_subj; // number of subjects
int<lower=1> M; // number of coefficients per subject
int<lower=1> NC; // number of group-level correlations
int prior_only; // should the likelihood be ignored?
}
parameters {
vector[K] b_y1; // population-level effects
vector[K] b_y2; // population-level effects
vector[K] b_y3; // population-level effects
vector<lower=0>[M] sd_1; // group-level standard deviations
matrix[M, N_subj] z; // standardized group-level effects
cholesky_factor_corr[M] L; // cholesky factor of correlation matrix
}
transformed parameters {
matrix[N_subj, M] r; // actual group-level effects
// using vectors speeds up indexing in loops
vector[N_subj] r_y1_x1;
vector[N_subj] r_y1_x2;
vector[N_subj] r_y1_x3;
vector[N_subj] r_y1_x4;
vector[N_subj] r_y2_x1;
vector[N_subj] r_y2_x2;
vector[N_subj] r_y2_x3;
vector[N_subj] r_y2_x4;
vector[N_subj] r_y3_x1;
vector[N_subj] r_y3_x2;
vector[N_subj] r_y3_x3;
vector[N_subj] r_y3_x4;
// compute actual group-level effects
r = scale_r_cor(z, sd_1, L);
r_y1_x1 = r[ : , 1];
r_y1_x2 = r[ : , 2];
r_y1_x3 = r[ : , 3];
r_y1_x4 = r[ : , 4];
r_y2_x1 = r[ : , 5];
r_y2_x2 = r[ : , 6];
r_y2_x3 = r[ : , 7];
r_y2_x4 = r[ : , 8];
r_y3_x1 = r[ : , 9];
r_y3_x2 = r[ : , 10];
r_y3_x3 = r[ : , 11];
r_y3_x4 = r[ : , 12];
}
model {
real lprior = 0; // prior contributions to the log posterior
// likelihood not including constants
if (!prior_only) {
// initialize linear predictor term
vector[N] mu_y1 = rep_vector(0.0, N);
// initialize linear predictor term
vector[N] mu_y2 = rep_vector(0.0, N);
// initialize linear predictor term
vector[N] mu_y3 = rep_vector(0.0, N);
for (n in 1 : N) {
// add more terms to the linear predictor
mu_y1[n] += r_y1_x1[J[n]]
* Z_Int[n]
+ r_y1_x2[J[n]]
* Z_X2[n]
+ r_y1_x3[J[n]]
* Z_X3[n]
+ r_y1_x4[J[n]]
* Z_X4[n];
}
for (n in 1 : N) {
// add more terms to the linear predictor
mu_y2[n] += r_y2_x1[J[n]]
* Z_Int[n]
+ r_y2_x2[J[n]]
* Z_X2[n]
+ r_y2_x3[J[n]]
* Z_X3[n]
+ r_y2_x4[J[n]]
* Z_X4[n];
}
for (n in 1 : N) {
// add more terms to the linear predictor
mu_y3[n] += r_y3_x1[J[n]]
* Z_Int[n]
+ r_y3_x2[J[n]]
* Z_X2[n]
+ r_y3_x3[J[n]]
* Z_X3[n]
+ r_y3_x4[J[n]]
* Z_X4[n];
}
target += bernoulli_logit_glm_lupmf(Y1 | X, mu_y1, b_y1);
target += bernoulli_logit_glm_lupmf(Y2 | X, mu_y2, b_y2);
target += bernoulli_logit_glm_lupmf(Y3 | X, mu_y3, b_y3);
}
// priors not including constants
lprior += normal_lupdf(b_y1[1] | 0, 1);
lprior += normal_lupdf(b_y1[2] | 0, 1);
lprior += normal_lupdf(b_y1[3] | 0, 1);
lprior += normal_lupdf(b_y1[4] | 0, 1);
lprior += normal_lupdf(b_y1[5] | 1, 1);
lprior += normal_lupdf(b_y1[6] | 0, 1);
lprior += normal_lupdf(b_y2 | 0, 1);
lprior += normal_lupdf(b_y3 | 0, 1);
lprior += normal_lupdf(sd_1 | 1, 1);
lprior += lkj_corr_cholesky_lupdf(L | 1);
target += lprior;
target += std_normal_lupdf(to_vector(z));
}
generated quantities {
vector[N] log_lik1;
vector[N] log_lik2;
vector[N] log_lik3;
vector[N] log_lik;
// For predictions on observed matrix
vector[N] mu_y1_pred;
vector[N] mu_y2_pred;
vector[N] mu_y3_pred;
vector[N] Xb_y1 = X * b_y1;
vector[N] Xb_y2 = X * b_y2;
vector[N] Xb_y3 = X * b_y3;
vector[N] prob_y1;
vector[N] prob_y2;
vector[N] prob_y3;
array[N] int y1_pred;
array[N] int y2_pred;
array[N] int y3_pred;
// compute group-level correlations
corr_matrix[M] Cor_1 = multiply_lower_tri_self_transpose(L);
vector<lower=-1, upper=1>[NC] cor_1;
// extract upper diagonal of correlation matrix
for (k in 1 : M) {
for (j in 1 : (k - 1)) {
cor_1[choose(k - 1, 2) + j] = Cor_1[j, k];
}
}
for (n in 1:N) { // predictions from observed data
mu_y1_pred[n] = r_y1_x1[J[n]]
* Z_Int[n]
+ r_y1_x2[J[n]]
* Z_X2[n]
+ r_y1_x3[J[n]]
* Z_X3[n]
+ r_y1_x4[J[n]]
* Z_X4[n];
mu_y2_pred[n] = r_y2_x1[J[n]]
* Z_Int[n]
+ r_y2_x2[J[n]]
* Z_X2[n]
+ r_y2_x3[J[n]]
* Z_X3[n]
+ r_y2_x4[J[n]]
* Z_X4[n];
mu_y3_pred[n] = r_y3_x1[J[n]]
* Z_Int[n]
+ r_y3_x2[J[n]]
* Z_X2[n]
+ r_y3_x3[J[n]]
* Z_X3[n]
+ r_y3_x4[J[n]]
* Z_X4[n];
// Predicted probabilities
prob_y1[n] = inv_logit(mu_y1_pred[n] + Xb_y1[n]);
prob_y2[n] = inv_logit(mu_y2_pred[n] + Xb_y2[n]);
prob_y3[n] = inv_logit(mu_y3_pred[n] + Xb_y3[n]);
// Log-likelihoods
log_lik1[n] = bernoulli_logit_glm_lpmf(Y1[n] | X, mu_y1_pred[n], b_y1);
log_lik2[n] = bernoulli_logit_glm_lpmf(Y2[n] | X, mu_y2_pred[n], b_y2);
log_lik3[n] = bernoulli_logit_glm_lpmf(Y3[n] | X, mu_y3_pred[n], b_y3);
log_lik[n] = log_lik1[n] + log_lik2[n] + log_lik3[n];
}
// Predictions
y1_pred = bernoulli_logit_glm_rng(X, mu_y1_pred, b_y1);
y2_pred = bernoulli_logit_glm_rng(X, mu_y2_pred, b_y2);
y3_pred = bernoulli_logit_glm_rng(X, mu_y3_pred, b_y3);
}
")
Ex_Mod <- cmdstan_model(stan_file_variables)
Create data list from .csv:
Ex_Data.csv (88.0 KB)
Ex_Data <- read.csv("Ex_Data.csv")
## Create data list ----
Ex_Stan_Data <- lst(N = nrow(tbl_data),
Y1 = Ex_Data$Y1,
Y2 = Ex_Data$Y2,
Y3 = Ex_Data$Y3,
X = model.matrix(~ 0 + X1 +
X2 +
X3 +
X4,
data = Ex_Data),
K = ncol(X),
K_zi = 53,
Z_Int = array(rep(1, nrow(Ex_Data)),
dimnames = list(rep(1, nrow(Ex_Data)))),
Z_X2 = array(Ex_Data$X2,
dimnames = list(Ex_Data$X2)),
Z_X3 = array(Ex_Data$X3,
dimnames = list(Ex_Data$X3)),
Z_X4 = array(Ex_Data$X4,
dimnames = list(Ex_Data$X4)),
J = Ex_Data %>%
mutate(subject = as.numeric(factor(subject))) %>%
pull(subject),
N_subj = length(unique(Ex_Data$subject)),
M = 4*3, # Number of group-level coefficients * outcomes
NC = (4*3)*((4*3) - 1)/2, # (M * (M-1))/2
prior_only = 0
)
Sample from model:
# Sample from compiled model ----
Ex_Fit <- Ex_Mod$sample(
data = Ex_Stan_Data,
iter_warmup = 1000,
iter_sampling = 1000,
chains = 4,
max_treedepth = 12,
adapt_delta = 0.99)
PPC plot
Loo results:
Ex_Fit$loo(ncores = ncores)
Computed from 4000 by 1296 log-likelihood matrix
Estimate SE
elpd_loo -6179529.1 53762.4
p_loo 4265220.3 40488.5
looic 12359058.2 107524.8
------
Monte Carlo SE of elpd_loo is NA.
Pareto k diagnostic values:
Count Pct. Min. n_eff
(-Inf, 0.5] (good) 0 0.0% <NA>
(0.5, 0.7] (ok) 0 0.0% <NA>
(0.7, 1] (bad) 0 0.0% <NA>
(1, Inf) (very bad) 1292 100.0% 0
See help('pareto-k-diagnostic') for details
The PPC plot looks great, but the loo results look terrrible.
The same model fit in brms
gives very good loo results.
fmla_y1 <- bf(Y1 ~ 0 + X1 +
X2 +
X3 +
X4 +
(1 + X2 + X3 + X4 | p | subject),
center = FALSE)
fmla_y2 <- bf(Y2 ~ 0 + X1 +
X2 +
X3 +
X4 +
(1 + X2 + X3 + X4 | p | subject),
center = FALSE)
fmla_y3 <- bf(Y3 ~ 0 + X1 +
X2 +
X3 +
X4 +
(1 + X2 + X3 + X4 | p | subject),
center = FALSE)
priors <- c(
set_prior("normal(0,1)",
class = "b",
resp = "Y1"),
set_prior("normal(1,1)",
class = "b",
coef = "X2",
resp = "Y1"),
set_prior("normal(1,1)",
class = "sd",
resp = "Y1"),
set_prior("normal(0,1)",
class = "b",
resp = "Y2"),
set_prior("normal(1,1)",
class = "sd",
resp = "Y2"),
set_prior("normal(0,1)",
class = "b",
resp = "Y3"),
set_prior("normal(1,1)",
class = "sd",
resp = "Y3")
)
brm_mod <- brm(fmla_y1 + fmla_y2 + fmla_y3,
family = bernoulli(),
Ex_Data,
prior = priors,
init = 0,
iter = 2000,
warmup = 1000,
chains = 4,
cores = ncores,
normalize = FALSE,
backend = "cmdstan",
control = list(max_treedepth = 12,
adapt_delta = 0.9)
)
loo(brm_mod, ncores = ncores)
Computed from 4000 by 1296 log-likelihood matrix
Estimate SE
elpd_loo -2297.4 37.3
p_loo 74.8 2.1
looic 4594.9 74.5
------
Monte Carlo SE of elpd_loo is 0.1.
All Pareto k estimates are good (k < 0.5).
See help('pareto-k-diagnostic') for details.