Hi,
I am working on a Hierarchical Summary ROC (HSROC) analysis model for a meta-analysis of diagnostic tests. It is my intention to make use of the LOO package for model diagnostics and model selection. In order to have better pareto statistics, it is my understanding that the random effect terms are best integrated out see random effects and integrated LOO example
My question is regarding the integration of 2 (or more) random effect terms in a model. Does anyone have an example they can share? I have put my current model code below with an example dataset. If anyone has any feedback, I would be very grateful.
Apologies if this has been discussed elsewhere. I was unsuccessful in my search.
Many thanks!
functions {
// Second integral
real integrand_theta(
real theta,
real notused,
array[] real pars,
data array[] real notused2,
array[] int n_tp_fp_tn_fn) {
real p;
real alpha = pars[1];
real beta = pars[2];
real theta_mean = pars[3];
real sigma_theta = pars[4];
real mu_alpha = pars[5];
real sigma_alpha = pars[6];
int tp = n_tp_fp_tn_fn[1];
int fp = n_tp_fp_tn_fn[2];
int tn = n_tp_fp_tn_fn[3];
int fn = n_tp_fp_tn_fn[4];
int n_pos = tp + fn;
int n_neg = tn + fp;
real logit_tp = ((theta_mean + theta) + 0.5*(alpha + mu_alpha))/exp(beta/2);
real logit_fp = ((theta_mean + theta) - 0.5*(alpha + mu_alpha))*exp(beta/2);
p = exp(
normal_lpdf(theta | 0, sigma_theta) +
normal_lpdf(alpha | 0, sigma_alpha) +
binomial_logit_lpmf(tp | n_pos, logit_tp) +
binomial_logit_lpmf(fp | n_neg, logit_fp)
);
return (p);
}
// Top level integral
real integrand_alpha_theta(
real alpha,
real notused,
array[] real pars,
data array[] real notused2,
array[] int n_tp_fp_tn_fn
) {
real beta = pars[1];
real theta_mean = pars[2];
real sigma_theta = pars[3];
real alpha_mean = pars[4];
real sigma_alpha = pars[5];
real p;
p = integrate_1d(
integrand_theta,
-6*sigma_theta,
6*sigma_theta,
append_array(
append_array(
append_array({alpha}, {beta}),
append_array({theta_mean}, {sigma_theta})
),
append_array(
{alpha_mean},
{sigma_alpha}
)
),
notused2,
n_tp_fp_tn_fn,
1e-4
);
return (p);
}
}
data {
int<lower=0> N_Studies; // Number of studies included
array[N_Studies, 4] int TP_FP_TN_FN;
array[N_Studies] real notused;
}
transformed data {
array[N_Studies] int TP; // Number identified as True Positive
array[N_Studies] int FP; // Number identified as False Positive
array[N_Studies] int FN; // Number identified as False Negative
array[N_Studies] int TN; // Number identified as True Negative
array[N_Studies] int N_Pos; // Number who are Positive
array[N_Studies] int N_Neg; // Number who are Negative
TP = to_array_1d(TP_FP_TN_FN[,1]);
FP = to_array_1d(TP_FP_TN_FN[,2]);
TN = to_array_1d(TP_FP_TN_FN[,3]);
FN = to_array_1d(TP_FP_TN_FN[,4]);
for (i in 1:N_Studies) {
N_Pos[i] = TP[i] + FN[i];
N_Neg[i] = TN[i] + FP[i];
}
}
parameters {
vector[N_Studies] theta_raw;
vector[N_Studies] alpha_raw;
real mu_theta;
real mu_alpha;
real beta;
real<lower=0> sigma_theta;
real<lower=0> sigma_alpha;
}
transformed parameters {
array[N_Studies] real logit_tp;
array[N_Studies] real logit_fp;
vector[N_Studies] theta;
vector[N_Studies] alpha;
theta = mu_theta + sigma_theta * theta_raw;
alpha = mu_alpha + sigma_alpha * alpha_raw;
for (i in 1:N_Studies) {
logit_tp[i] = (theta[i] + 0.5*alpha[i])/exp(beta/2);
logit_fp[i] = (theta[i] - 0.5*alpha[i])*exp(beta/2);
}
}
model {
theta_raw ~ std_normal();
alpha_raw ~ std_normal();
TP ~ binomial_logit(N_Pos, logit_tp);
FP ~ binomial_logit(N_Neg, logit_fp);
}
generated quantities {
array[N_Studies] real TP_rep;
array[N_Studies] real FP_rep;
vector[N_Studies] log_lik;
TP_rep = binomial_rng(N_Pos, inv_logit(logit_tp));
FP_rep = binomial_rng(N_Neg, inv_logit(logit_fp));
// Commented out so the example will run smoothly
// for (i in 1:N_Studies) {
// // Generate the log_lik for the LOO estimates
// log_lik[i] = log(
// integrate_1d(
// integrand_alpha_theta,
// -6*sigma_alpha,
// 6*sigma_alpha,
// append_array(
// append_array(
// append_array({beta}, {mu_theta}),
// append_array({sigma_theta}, {mu_alpha})
// ),
// {sigma_alpha}
// ),
// notused,
// to_array_1d(TP_FP_TN_FN[i,]),
// 1e-4
// )
// );
// }
}
library(cmstanr)
mod <- cmdstanr::cmdstan_model(stan_file = "hsroc.stan")
TP <- c(9, 3, 3, 3, 0, 7, 12, 23, 8, 16)
FP <- c(2, 6, 2, 11, 0, 2, 4, 5, 5, 2)
TN <- c(44, 32, 16, 44, 15, 167, 29, 230, 53, 22)
FN <- c(2, 5, 1, 12, 5, 22, 4, 14, 5, 2)
TP_FP_TN_FN <- matrix(c(
TP, FP, TN, FN
), ncol = 4)
stan_data <- list(
N_Studies = nrow(TP_FP_TN_FN),
TP_FP_TN_FN = TP_FP_TN_FN,
notused = rep(0, nrow(TP_FP_TN_FN))
)
hsroc_stan_fit <- mod$sample(
data = stan_data,
seed = 2108013527,
chains = 4,
iter_sampling = 1000,
thin = 1,
iter_warmup = 500,
save_warmup = TRUE,
parallel_chains = 4
)