Hi all,
I having a binomial generalized linear mixed effects model that I have written in STAN, but it is incredibly slow compared to RSTANARM. I would like to write everything in STAN proper, but need some help optimizing the model. I think that the loops are killing me, but not sure how to correct. Here is reproducible code. First, the data and the RSTANARM code.
library(lme4)
library(rstan)
library(rstanarm)
hdp <- read.csv("https://stats.idre.ucla.edu/stat/data/hdp.csv")
hdp <- within(hdp, {
Married <- factor(Married, levels = 0:1, labels = c("no", "yes"))
DID <- factor(DID)
HID <- factor(HID)
})
m <- glmer(remission ~ IL6 + CRP + CancerStage + LengthofStay + Experience +
(1 | DID), data = hdp, family = binomial, control = glmerControl(optimizer = "bobyqa"),
nAGQ = 10)
# print the mod results without correlations among fixed effects
print(m, corr = FALSE)
m_stan= stan_glmer(remission ~ IL6 + CRP + CancerStage + LengthofStay + Experience +
(1 | DID),family=binomial,data=hdp)
Now, here is the STAN code that I have written:
binomCode = 'data{
int N; // number of data points
int remission[N]; // result
int NF; // rows in matrix
int DID[N]; // group effect
int N_DID; // number of groups
matrix [N,NF] Xmat; // design matrix
}
parameters{
vector[NF] beta; //vector of betas
real vary_DID_index[N_DID];
real sigma_DID;
}
model{
vector[N] vary;
vector[N] lm;
// Priors
beta ~ normal( 0 , 100 );
sigma_DID ~ uniform( 0 , 100 );
// Varying effects
for ( j in 1:N_DID ) vary_DID_index[j] ~ normal( 0 , sigma_DID );
// Fixed effects
for ( i in 1:N ) {
vary[i] = vary_DID_index[DID[i]];
lm[i] = vary[i] + Xmat[i]*beta;
remission[i] ~ bernoulli_logit(lm[i]);
}
}'
# now without the crutch
Xmat = model.matrix(m)
## MCMC settings
ni <- 5000
nt <- 1
nb <- 2000
nc <- 3
remission = as.numeric(hdp$remission)
N = nrow(Xmat)
NF = ncol(Xmat)
DID = as.numeric(hdp$DID)
N_DID = length(levels(hdp$DID))
data.list = list(N = N, remission = remission, NF = NF, DID = DID, N_DID = N_DID,Xmat = Xmat)
m2_stan = stan("binomCode",data=data.list,chains = nc,thin = nt,iter = ni, warmup = nb,
seed = 1)
print(m)
print(m_stan)
print(m2_stan, pars=c("beta[1]","beta[2]","beta[3]","beta[4]","beta[5]","beta[6]","beta[7]","beta[8]"))
I get the same results, but my code is way too slow. Any help would be appreciated.