edit2: OK i’m an idiot - somehow between file edits I accidently removed priors for the standard deviation parameters (d_sd and d_sd) - adding these back in solves the problem:
edit: I think I know what the problem is - I need to restrict the range of “bound_d” and “bound_nd” - the problem is i’m not sure how to implement this, since you can’t set constraints on locally declared variables, and it also relies on prev_d and prev_nd which is updated locally.
The reason why I think this will fix the issue is that the other version I have based off of the Stan manual parameterization (which works but is very slow), I also had similar looking trace plot unless I fixed the latent vectors (denoted as z in the manual) to have both upper and lower bounds - so rather than just truncating them at 0 i’d also set an upper bound of 10 for the latent vec. corresponding to Y=1 and lower bound of -10 for the ones corresponding to Y=0 observations. This works and it doesn’t negatively affect inference for our context, since it would still allow subjects to have a sensitivity/Specificity of up to Phi(10) = 1. I think that this has something to do with the numerical stability of Phi / normal CDFs
OK, now something bizarre happens - I get very good fit to the data / posterior predictive, but many divergent transitions and traceplots which look like this:
Code and data is below
data {
real x;
real y1; // SD for between-study heterogeneity prior
int<lower=1> nt; // number of tests
int<lower=1> NS; // total # of studies
int<lower=1> ns[NS]; // number of individuals in each study
int<lower=0> y[max(ns),nt, NS]; // N individuals and n1 tests, NS studies
int r[NS,4];
}
parameters {
ordered[nt] a1_m;
ordered[nt] a2_m;
vector<lower=0>[nt] d_sd;
vector<lower=0>[nt] nd_sd;
vector[4] z1[NS];
real<lower=0,upper=1> p[NS];
real<lower=0,upper=1> u_d[max(ns),nt, NS]; // nuisance that absorbs inequality constraints
real<lower=0,upper=1> u_nd[max(ns),nt, NS]; // nuisance that absorbs inequality constraints
cholesky_factor_corr[nt] L_Omega_nd[NS];
cholesky_factor_corr[nt] L_Omega_d[NS];
}
transformed parameters {
vector[nt] d_m;
vector[nt] nd_m;
vector[nt] d[NS];
vector[nt] nd[NS];
d_m[1] = a1_m[2];
d_m[2] = a2_m[2];
nd_m[1] = a1_m[1];
nd_m[2] = a2_m[1];
for (s in 1:NS) {
d[s,1] = d_m[1] ;// + d_sd[1]*z1[s,1]*y1;
d[s,2] = d_m[2] + d_sd[2]*z1[s,2]*y1;
nd[s,1] = nd_m[1] ;// + nd_sd[1]*z1[s,3]*y1;
nd[s,2] = nd_m[2] + nd_sd[2]*z1[s,4]*y1;
}
}
model {
for (s in 1:NS)
z1[s,] ~ std_normal();
for (s in 1:NS) {
L_Omega_nd[s,] ~ lkj_corr_cholesky(x);
L_Omega_d[s,] ~ lkj_corr_cholesky(x); }
// sens
a1_m[2] ~ std_normal(); // ~ normal(1, 0.4);
a2_m[2] ~ std_normal(); // normal(1, 0.4);
// spec
a1_m[1] ~ std_normal(); // ~ normal(-1, 0.4);
a2_m[1] ~ std_normal(); // ~ normal(-1, 0.4);
{ // likelihood
for (s in 1:NS) {
real lp_bern0 = bernoulli_lpmf(0 | p[s]);
real lp_bern1 = bernoulli_lpmf(1 | p[s]);
for (n in 1:ns[s]) {
real lp1;
real lp0;
vector[nt] y1d;
vector[nt] y1nd;
vector[nt] z_d;
vector[nt] z_nd;
real prev_d;
real prev_nd;
prev_d = 0;
prev_nd = 0;
for (i in 1:nt) { // Phi and inv_Phi may overflow and / or be numerically inaccurate
real bound_d; // threshold at which utility = 0
real bound_nd; // threshold at which utility = 0
bound_d = Phi_approx( -(d[s,i] + prev_d) / L_Omega_d[s,i,i] );
bound_nd = Phi_approx( -(nd[s,i] + prev_nd) / L_Omega_nd[s,i,i] );
if (y[n,i,s] == 1) {
real t_d;
real t_nd;
t_d = bound_d + (1 - bound_d) * u_d[n,i,s];
z_d[i] = inv_Phi(t_d); // implies utility is positive
t_nd = bound_nd + (1 - bound_nd) * u_nd[n,i,s];
z_nd[i] = inv_Phi(t_nd); // implies utility is positive
y1d[i] = log1m(bound_d); // Jacobian adjustment
y1nd[i] = log1m(bound_nd);
}
if (y[n,i,s] == 0) {
real t_d;
real t_nd;
t_d = bound_d * u_d[n,i,s];
z_d[i] = inv_Phi(t_d); // implies utility is negative
t_nd = bound_nd * u_nd[n,i,s];
z_nd[i] = inv_Phi(t_nd); // implies utility is negative
y1d[i] = log(bound_d);
y1nd[i] = log(bound_nd);
}
if (i < nt) prev_d = L_Omega_d[s,i +1,1:i ] * head(z_d,i);
if (i < nt) prev_nd = L_Omega_nd[s,i +1,1:i ] * head(z_nd,i);
// Jacobian adjustments imply z is truncated standard normal
// thus utility --- mu + L_Omega * z --- is truncated multivariate normal
}
lp1 = sum(y1d) + lp_bern1;
lp0 = sum(y1nd) + lp_bern0;
target += log_sum_exp(lp1,lp0);
}
}
}
}
generated quantities {
vector<lower=0,upper=1>[nt] Se;
vector<lower=0,upper=1>[nt] Sp;
vector<lower=0,upper=1>[nt] se[NS];
vector<lower=0,upper=1>[nt] sp[NS];
vector<lower=0,upper=1>[nt] fp[NS];
vector[4] pr[NS];
vector[4] e[NS];
vector[4] o[NS];
vector[4] dv[NS];
vector[4] ot[NS];
real cov_d[NS];
real cov_nd[NS];
real rho2_nd[NS];
real rho2_d[NS];
real dev[NS];
real ec[NS];
real oc[NS];
real dc[NS];
real resdev;
corr_matrix[nt] Omega_d[NS];
corr_matrix[nt] Omega_nd[NS];
// overall test accuracy estimates, for each test
Se[1] = Phi_approx( d_m[1] );
Sp[1] = Phi_approx( -nd_m[1] );
Se[2] = Phi_approx( d_m[2] );
Sp[2] = Phi_approx( -nd_m[2] );
for (s in 1:NS) {
int num = 100;
vector[nt] z_d[NS, num];
vector[nt] z_nd[NS, num];
int y_hat_d[num,nt, NS];
int y_hat_nd[num,nt, NS];
real n1 = num;
real n2 = num;
Omega_d[s,1:2,1:2] = multiply_lower_tri_self_transpose(L_Omega_d[s,1:2,1:2]);
Omega_nd[s,1:2,1:2] = multiply_lower_tri_self_transpose(L_Omega_nd[s,1:2,1:2]);
// simulate predictive dist. from posterior for posterior predictive checks
for (n in 1:num) {
z_d[s,n,1:2] = multi_normal_rng(d[s,1:2], Omega_d[s,1:2, 1:2]);
z_nd[s,n,1:2] = multi_normal_rng(nd[s,1:2], Omega_nd[s, 1:2, 1:2]);
for (i in 1:2) {
if (z_d[s,n,i] > 0) {
y_hat_d[n,i,s] = 1; }
else {
y_hat_d[n,i,s] = 0; }
}
for (i in 1:2) {
if (z_nd[s,n,i] > 0) {
y_hat_nd[n,i,s] = 1; }
else {
y_hat_nd[n,i,s] = 0; }
}
}
// estimate within-study correlations for conditional dependence
rho2_d[s] = (n1*sum(to_vector(y_hat_d[,1,s]) .* to_vector(y_hat_d[,2,s])) - sum(to_vector(y_hat_d[,1,s]))*sum(to_vector(y_hat_d[,2,s])) ) / sqrt( (n1*sum(to_vector(y_hat_d[,1,s]) .* to_vector(y_hat_d[,1,s])) - sum(to_vector(y_hat_d[,1,s]))^2) * (n1*sum(to_vector(y_hat_d[,2,s]) .* to_vector(y_hat_d[,2,s])) - sum(to_vector(y_hat_d[,2,s]))^2 ) );
rho2_nd[s] = (n2*sum(to_vector(y_hat_nd[,1,s]) .* to_vector(y_hat_nd[,2,s])) - sum(to_vector(y_hat_nd[,1,s]))*sum(to_vector(y_hat_nd[,2,s])) ) / sqrt( (n2*sum(to_vector(y_hat_nd[,1,s]) .* to_vector(y_hat_nd[,1,s])) - sum(to_vector(y_hat_nd[,1,s]))^2) * (n2*sum(to_vector(y_hat_nd[,2,s]) .* to_vector(y_hat_nd[,2,s])) - sum(to_vector(y_hat_nd[,2,s]))^2 ) );
// study-specific accuracy estimates
se[s,1] = Phi_approx( d[s,1] );
sp[s,1] = Phi_approx( -nd[s,1] );
se[s,2] = Phi_approx( d[s,2] );
sp[s,2] = Phi_approx( -nd[s,2] );
fp[s,] = 1-sp[s,];
cov_d[s] = Omega_d[s,1,2]*sqrt( se[s,1]*se[s,2]*(1-se[s,1])*(1-se[s,2]) );
cov_nd[s] = Omega_nd[s,1,2]*sqrt( sp[s,1]*sp[s,2]*(1-sp[s,1])*(1-sp[s,2]) );
// construct expected/model-predicted 2-way table for each study
pr[s,1] = p[s] * ( se[s,1] * se[s,2] + cov_d[s]) + (1-p[s]) * ( fp[s,1] * fp[s,2] + cov_nd[s] );
pr[s,2] = p[s] * ( se[s,1] * (1-se[s,2]) - cov_d[s]) + (1-p[s]) * ( (fp[s,1]) * (1-fp[s,2]) - cov_nd[s] );
pr[s,3] = p[s] * ( (1-se[s,1]) * se[s,2] - cov_d[s]) + (1-p[s]) * ( (1-fp[s,1]) * fp[s,2] - cov_nd[s] );
pr[s,4] = p[s] * ( (1-se[s,1]) * (1-se[s,2]) + cov_d[s]) + (1-p[s]) * ( (1-fp[s,1]) * (1-fp[s,2]) + cov_nd[s] );
ot[s,1] = r[s,1];
ot[s,2] = r[s,2];
ot[s,3] = r[s,3];
ot[s,4] = r[s,4];
for (a in 1:4) {
ot[s,a] = r[s,a];
if (ot[s,a] != 0) {
e[s,a] = ns[s] * pr[s,a] ; // expected cell count (2x2 tables so 4)
o[s,a] = ot[s,a] / ns[s] ; //# observed prob
dv[s,a] = 2 * ot[s,a] * log(ot[s,a]/e[s,a]) ;
}
if (ot[s,a] == 0) {
e[s,a] = ns[s] * pr[s,a] ;
o[s,a] = ot[s,a] / ns[s] ;
dv[s,a] = 0;
}
}
dev[s] = sum(dv[s,]) ;
// Corr residuals (Qu et al, 1996)
ec[s] = (pr[s,1] - (pr[s,1]+pr[s,2]) * (pr[s,1]+pr[s,3]) )/ // expected correlations
sqrt((pr[s,1]+pr[s,2]) * (pr[s,1]+pr[s,3]) * (1-pr[s,1]-pr[s,2]) * (1-pr[s,1]-pr[s,3]));
oc[s] = (o[s,1] - (o[s,1]+o[s,2]) * (o[s,1]+o[s,3])) / // # observed correlations
sqrt((o[s,1]+o[s,2]) * (o[s,1]+o[s,3]) * (1-o[s,1]-o[s,2]) * (1-o[s,1]-o[s,3]));
dc[s] = oc[s] - ec[s]; // # correlation residual
}
resdev = sum(dev[]);
}
R code to run model:
num <- 15
T1 <- c(1,2,2,2,2,3,2,3,2,2,2,2,1,3,3)
T2 <- rep(4, times = 15)
T <- matrix(c(T1, T2), ncol=2, nrow=15)
r1 <- c(97, 1,20,14,93, 31, 132, 305, 17, 44, 54, 39, 64, 7, 25) ; length(r1) #tp
r2 <- c(20, 0 ,3, 1 ,30 ,6,23,9,19,5,22,4, 0, 4,44) ; length(r2) #fn
r3 <- c(14,9,2,44,183, 3,54,98,31,61,63,536, 612 , 31, 3) ; length(r3) #fp. gs=0. ind=1
r4 <- c(297, 45, 231, 285, 1845, 248, 226, 256, 580, 552, 980, 227, 2051, 98, 170) ; length(r4) # tn, ind=0, gs = 0
ns <- c()
for (i in 1:num) {ns[i] <- r1[i] + r2[i] + r3[i] + r4[i]}
# order by test
data <- data.frame(r1,r2,r3,r4, ns, t1 = T[,1], t2= T[,2]) #%>% arrange(t1)
r1 <- data$r1 ; r2 <- data$r2 ; r3 <- data$r3 ; r4 <- data$r4
r <- matrix(ncol = 4, nrow = num, c(r1,r2,r3,r4)) ; r
ns <- data$ns
data24 <-list()
data24 <- list( r = r, n = ns, NS= num ,T=data$t1, num_ref=3, nt=2)
NS=15
sum(ns) # N= 10,968
y_list <- list()
y1a <- list(length = max(ns))
y1b <- list(length = max(ns))
max <- max(ns)
for (i in 1:NS) {
y1a[[i]] = c(rep(1, r[i,1]), rep(1, r[i,2]), rep(0, r[i,3]), rep(0, r[i,4]), rep(2, max - ns[i] ))
y1b[[i]] = c(rep(1, r[i,1]), rep(0, r[i,2]), rep(1, r[i,3]), rep(0, r[i,4]), rep(2, max - ns[i] ))
y_list[[i]] = matrix(ncol = 2, c(y1a[[i]] , y1b[[i]] ))
}
y = array(data= unlist(y_list), dim = c( max(ns), 2, NS))
file <- file.path(file = "2_tests_meta_analysis.stan")
mod <- cmdstan_model(file)
init = list(a1_m = c(-2,0.5),
a2_m = c(-1,0.5))
meta_model2 <- mod$sample(
data = list(m=0, x=5, y1=0, NS=4, ns=ns[1:4], y=y[1:428,,1:4], nt = 2, r = r[1:4,] ), # to run with just first 4 studies
seed = 1,
chains = 1,
parallel_chains = 1,
iter_warmup = 200,
iter_sampling = 200,
refresh = 10,
#init=list(init),
adapt_delta = 0.80,
max_treedepth = 10)
meta_model2r <- rstan::read_stan_csv(meta_model2$output_files())