Hi,
I am running the following model. The estimates are seem fine so it seems to “work” in that sense. However, the estimation is extremely slow. In total there are 15 studies (see data below) and I am just subsetting the data on the first 4 to do test runs. However, even just with these 4 studies it takes ~ 45 minutes just to do 500 iterations.
I am wondering if anybody has any ideas for how to speed this up, or whether these models are inherently this slow.
I am not aware of these models being used much in the literature except for this paper , however they used SAS and do not explicitly mention computation times.
data {
int<lower=2> nt; // number of tests
int<lower=0> NS; // total # of studies
int<lower=1> n[NS]; // number of individuals in each study
int r[NS,4]; // 2x2 table for each study
}
transformed data {
int res1[NS, max(n)] ;
int res2[NS, max(n)] ;
for (s in 1:NS) {
res1[s] = append_array( append_array( rep_array(0, r[s,1]), rep_array(1, r[s,2]) ) , append_array( rep_array(0, r[s,3]), rep_array(1, r[s,4]) ) ) ;
res2[s] = append_array( append_array( rep_array(0, r[s,1]), rep_array(0, r[s,2]) ) , append_array( rep_array(1, r[s,3]), rep_array(1, r[s,4]) ) ) ;
}
}
parameters {
vector[nt] a11_raw;
vector[nt] a10_raw;
real<lower=0,upper=1> p[NS];
}
transformed parameters {
vector[nt] a11;
vector[nt] a10;
a11 = a11_raw;
a10 = a10_raw;
}
model {
a11_raw ~ std_normal();
a10_raw ~ std_normal();
for (s in 1:NS) {
for (i in 1:n[s]) {
real lps[2];
// logprob conditional on non-diseased
lps[1] = bernoulli_lpmf(0| p[s])
+ bernoulli_lpmf(res1[s,i] | Phi_approx(-a10[1] ) )
+ bernoulli_lpmf(res2[s,i] | Phi_approx(-a10[2] ) );
// logprob conditional on diseased
lps[2] = bernoulli_lpmf(1| p[s])
+ bernoulli_lpmf(res1[s,i] | Phi_approx(a11[1] ) )
+ bernoulli_lpmf(res2[s,i] | Phi_approx(a11[2] ) );
// marginalize
target += log_sum_exp(lps);
}
}
}
generated quantities {
vector<lower=0,upper=1>[nt] Se;
vector<lower=0,upper=1>[nt] Sp;
vector<lower=0,upper=1>[nt] fp;
vector<lower=0,upper=1>[nt] fn;
for (j in 1:nt) {
// overall test accuracy estimates, for each test
fn[j] = 1-Phi_approx( (a11[j] ) );
Se[j] = Phi_approx( (a11[j] ) );
Sp[j] = Phi_approx( (a10[j] ) );
fp[j] = 1- Phi_approx( (a10[j] ) );
}
}
The data and code to run the model from R is:
num <- 15
T1 <- c(1,2,2,2,2,3,2,3,2,2,2,2,1,3,3)
T2 <- rep(4, times = 15)
T <- matrix(c(T1, T2), ncol=2, nrow=15)
r1 <- c(97, 1,20,14,93, 31, 132, 305, 17, 44, 54, 39, 64, 7, 25) ; length(r1) #tp
r2 <- c(20, 0 ,3, 1 ,30 ,6,23,9,19,5,22,4, 0, 4,44) ; length(r2) #fn
r3 <- c(14,9,2,44,183, 3,54,98,31,61,63,536, 612 , 31, 3) ; length(r3) #fp. gs=0. ind=1
r4 <- c(297, 45, 231, 285, 1845, 248, 226, 256, 580, 552, 980, 227, 2051, 98, 170) ; length(r4) # tn, ind=0,
ns <- c()
for (i in 1:num) {ns[i] <- r1[i] + r2[i] + r3[i] + r4[i]}
# order by test
data <- data.frame(r1,r2,r3,r4, ns, t1 = T[,1], t2= T[,2]) #%>% arrange(t1)
r1 <- data$r1 ; r2 <- data$r2 ; r3 <- data$r3 ; r4 <- data$r4
r <- matrix(ncol = 4, nrow = num, c(r1,r2,r3,r4)) ; r
ns <- data$ns
data24 <-list()
pos <- r1+r2
neg <- r3+r4
#data24 <- list( r = r, n = ns, NS= num , pos=pos, neg=neg, T=data$t1, num_ref=3, nt=2)
data24 <- list( r = r[1:4,], n = ns[1:4], NS= 4 , num_ref=3, nt=2)
data24
meta_model <- stan(file = "latent_trait_2_tests_meta_analysis.stan",
data =data24,
iter = 500,
chains = 1,# verbose=TRUE,
# init = list(init,init, init, init),
control=list(adapt_delta=0.8, max_treedepth =10)
)