Hi Stan expert group, I am new to Stan and is facing some serious problems while estimating parameters using MCMC approach for Multilevel threshold autoregressive model. I have shared both R code and associated Stan code. Every time after sampling, my results shows that at least 10 or more divergent issues with treedepth issues along with R hat is too low or Number of efficient samples are too low (between 2 to 5). I do not know why this happen, could you please help me out in this regard.
# the R code starts with creating function for data of MLTAR model
createdata_mltar <- function(
seed = 12345678,
Npat = 50,
preslope.truemean = 0.2,
preslope.truesd = 0.1,
postslope.truemean = 0.8,
postslope.truesd = 0.1,
tau.truemean = 15 ,
tau.truesd = 2 ,
betatau.lowerbound = 9 ,
betatau.upperbound = 25 ,
error.truesd = 2,
corr.preslope.postslope = 0.5,
corr.preslope.tau = 0.3,
corr.postslope.tau = 0.2)
{
set.seed(seed)
# Number of measurements per patient
Ntimepoints<- 100
id.long <- rep(1:Npat, times = Ntimepoints)
# Measurement errors
error <- rnorm(n = length(id.long), mean = 0, sd = error.truesd)
# create a dataframe
dat1 <- data.frame(id=id.long, error=error)
# True random effects (correlated): intercept, preslope, postslope, knot point
id.short <- rep(1:Npat)
mu <- c(preslope.truemean,
postslope.truemean,
tau.truemean)
var1 <- preslope.truesd ^ 2
var2 <- postslope.truesd ^ 2
var3 <- tau.truesd ^ 2
cov12 <- corr.preslope.postslope * preslope.truesd * postslope.truesd
cov13 <- corr.preslope.tau * preslope.truesd * tau.truesd
cov23 <- corr.postslope.tau * postslope.truesd * tau.truesd
covmatrix <- matrix(c(var1, cov12, cov13,
cov12, var2, cov23,
cov13, cov23, var3),
nrow = 3, ncol = 3)
cormatrix <- cov2cor(covmatrix)
simparms <- mvrnorm(n = Npat, mu = mu, Sigma = covmatrix)
# Short data (true random effect parameters: intercept, slopes and knot point)
dat2 <- data.frame(id = id.short, simparms)
# Merge long and short data after that order data by id
simdat <- merge(dat1, dat2, by = "id")
#------------------------------------------------------------------------#
# ----------Simulating data for MLTAR(2,1,1) model-----------------------#
#------------------------------------------------------------------------#
y <- vector("numeric", nrow(simdat))
y[1]<- error[1] # assigning first value for y
for(i in 2:nrow(simdat)){
df <- simdat[i,]
## Generate data y recursively from time 2 to observation
if (y[(i-1)] <= simparms[df$id, 3]) ## Determine the location of the lagged y
## Simulate data from Regime 1
{y[i]=sum(simparms[df$id, 3],simparms[df$id, 1]*(y[i-1]-simparms[df$id, 3]),
df$error)}
else
## Simulate data from Regime 2
{y[i]=sum(simparms[df$id, 3],simparms[df$id, 2]*(y[i-1]-simparms[df$id, 3]),
df$error)}
}
simdat$y<-(y)
# Return data for Stan
return(list(
trueparms = c(
"beta[1]" = preslope.truemean,
"beta[2]" = postslope.truemean,
"betakp" = tau.truemean,
"u_sigma[1,2]" = corr.preslope.postslope * preslope.truesd * postslope.truesd,
"u_sigma[1,3]" = corr.preslope.tau * preslope.truesd * tau.truesd,
"u_sigma[2,3]" = corr.postslope.tau * postslope.truesd * tau.truesd,
"y_sd" = error.truesd),
standata = list(
"N" = nrow(simdat),
"Npat" = Npat,
"fixedkp" = tau.truemean,
"zeros3" = rep(0,3),
"betatau_lower" = betatau.lowerbound,
"betatau_upper" = betatau.upperbound,
"id" = simdat$id,
"y" = simdat$y)
))
}
#==============================================================
# Create simulated data, and fit the MLTAR(2,1,1) model
#==============================================================
# Create a single simulated dataset with specified seed
standata <- createdata_mltar(seed = 12345678)
# Specify the location of the Stan model code file
# Notes: The stancode.filepath object should be changed, so that it contains
# the pathway to the Stan model code, wherever that file is installed
# on your PC.
m <- stan_model('Stan_Code_MLTAR_MLAR.stan')
stanmonitor <- c("beta",
"betakp",
"y_sd",
"u_sigma"
)
# Fit the MLTAR(2,1,1) model to the simulated data
stanfit <- sampling(m, data = standata$standata,
pars = stanmonitor,
chains = 2,
cores = 1,
iter = 10000,
warmup = 300,
thin = 200,
init_r = 0.2,
control = list(adapt_delta = 0.999, max_treedepth = 12),
seed = 978245244)
#------------------stan code------------------#
data {
int<lower=1> N; // number of observations
int<lower=1> Npat; // number of individuals
real<lower=0> y[N]; // outcome data
int<lower=1,upper=Npat> id[N]; // patient id for each observation
vector<lower=0,upper=0>[3] zeros3; // mean vector for random effects dist
int<lower=1> betatau_lower; // lower bound for (prior on) mean threshold point
int<lower=1> betatau_upper; // upper bound for (prior on) mean thershold point
}
parameters {
vector[2] beta; // fixed effects, intercept and slopes
real<lower=betatau_lower,upper=betatau_upper> betakp; // fixed effects, knotpoint
real<lower=0> y_sd; // level 1 error sd
matrix[3,Npat] u;
cov_matrix[3] u_sigma;
}
transformed parameters {
vector[3] alpha[Npat]; // random effects
real y_mu[N]; // mean parameter based on regression equation
//==========================
// calculate random effects
//==========================
//for (i in 1:Npat) {
//for (k in 1:2) alpha[i,k] <- beta[k] + u[id[i],k];
//alpha[i,3] <- betakp + u[id[i],3];
//}
for (i in 1:Npat) {
for (k in 1:2) alpha[i,k] <- beta[k] + u[id[i],k];
alpha[i,3] <- betakp + u[id[i],3];
}
//=====================
// regression equation
//=====================
for (j in 2:N) {
y_mu[1] <- 10 ;
if (y_mu[(j-1)] <= alpha[id[j], 3])
//if (age[j] < alpha[id[j],4])
y_mu[j] <- alpha[id[j],3] + alpha[id[j],1]*(y_mu[(j-1)] - alpha[id[j],3]);
else
y_mu[j] <- alpha[id[j],3] + alpha[id[j],2]*(y[(j-1)] - alpha[id[j],3]);
}
}
//========
// priors
//========
model {
for (i in 1:Npat) {
col(u,i) ~ multi_normal(zeros3,u_sigma);
}
matrix[3,3] rateForWish;
rateForWish[3,3] <- 1;
for (s in 1:2) {
rateForWish[s,s] <- 1;
for (z in (s+1):3){
rateForWish[s,z]<- 0; rateForWish[z,s] <- 0;
}
}
beta[1] ~ normal(0, 0.000001); // prior: fixed effect, intercept
beta[2] ~ normal(0, 0.000001); // prior: fixed effect, slope before knot
u_sigma ~ inv_wishart(3,rateForWish);
betakp ~ normal(0,0.000001);
y_sd ~ gamma(0.001,0.001); // prior: level 1 error sd
//==================
// model likelihood
//==================
y ~ normal(y_mu, y_sd); // likelihood for the observed data
}
// end of the program