I am still trying to get my MSM-TVTP model to work properly (previous Stan and R code found here, but the current question is maybe of general interest) and just looked at the posterior correlation of the parameters. Focusing only on gamma (intercept of transition probabilities), alpha (intercept of linear model), and phi (AR1 parameter), I get the correlation matrix below. I am wondering both whether (and to which degree) this is affecting a) the efficiency of the sampling and b) the model’s ability to recover the parameters. Especially the correlation between alpha1 and phi1 worries me. The most recent set of simulations showed that the parameters are being “approximately” recovered but seem to get stuck in the same “wrong” area.
Do you have any suggestions to reduce this dependency? Reparameterizing the model with its unconditional mean seems to conflict with the ordering on the intercepts to prevent label switching.
gamma[1] gamma[2] alpha[1] alpha[2] phi[1] phi[2]
gamma[1] 1.00 0.221 0.25 0.33 -0.267 -0.179
gamma[2] 0.22 1.000 -0.14 0.13 0.072 0.052
alpha[1] 0.25 -0.140 1.00 -0.36 -0.939 0.502
alpha[2] 0.33 0.127 -0.36 1.00 0.299 -0.812
phi[1] -0.27 0.072 -0.94 0.30 1.000 -0.569
phi[2] -0.18 0.052 0.50 -0.81 -0.569 1.000
R and Stan code for reference again
#####################################
### MC simulation ###
### for MSM-TVTP ###
#####################################
## setup
{
## libraries and options
library(rstan)
#options(mc.cores = parallel::detectCores())
rstan_options(auto_write = TRUE)
library(shinystan)
#################################################################
## seed
set.seed(12345)
}
## simulation
dgp <- function(N, T, M){
## covariates
z <- array(rnorm(N * M * T, 0, 1), dim = c(N, T, M))
# state matrix
s <- matrix(NA, nrow = N, ncol = T) # a T by N matrix indicating the state the individual i is in at point t in cell [t, i]
# transition probabilities
p <- array(NA, dim = c(2, 2, T, N))
for (i in 1:N){
for (t in 1:T){
s_star <- gamma
#+ t(z[i, t, ]) %*% lambda # [t] because the TP are lagged when determining y
# s_star <- gamma[1] + t(z[i, t, ]) %*% lambda[, 1]
p_s <- matrix(NA, ncol = 2, nrow = 2)
diag(p_s) <- pnorm(s_star) # variance of 1
p_s[1, 2] <- 1 - p_s[1, 1]
p_s[2, 1] <- 1 - p_s[2, 2]
# p_11 + p_12 = 1 and p_21 + p_22 = 1
p[ , , t, i] <- p_s # takes into consideration that lambda should have the same effect
# in each state i.e. increasing the probability of staying
}
# states
s[i, 1] <- rbinom(1, 1, (1 - p[2, 2, 1, i]) / (2 - p[2, 2, 1, i] - p[1, 1, 1, i])) + 1
# s[i, 1] <- rbinom(1, 1, 0.5)
for (t in 2:T){
if (s[i, t - 1] == 1){
s[i, t] <- rbinom(n = 1, size = 1,prob = p[1, 2, t - 1, i]) + 1
} else if (s[i, t - 1] == 2){
s[i, t] <- rbinom(n = 1, size = 1,prob = p[2, 2, t - 1, i]) + 1
}
}
}
# outcome
# individual time series
y.pre <- array(NA, dim = c(N, T, 2))
for (i in 1:N){
y.pre[i, 1,] <- rnorm(2, alpha, sigma)
for (t in 2:T){
y.pre[i, t,] <- rnorm(2, alpha + phi * y.pre[i, (t - 1),], sigma)
}
}
# switching time series
y <- matrix(NA, nrow = N, ncol = T)
for (i in 1:N){
for (t in 1:T){
y[i, t] <- y.pre[i ,t , s[i, t]]
}
}
## set up data for stan
data_list <- list(T = T,
N = N,
M = M,
y = y,
z = z)
## out
out <- list(data = data_list)
return(out)
}
####################################################################################################
####################################################################################################
## Sourcing current directory
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
## parameters
M <- 2
alpha <- c(2, 6)
gamma <- c(0.5, 0.5) # intercept in TPs
#lambda <- matrix(c(-0.5, 0.3, 0.9, -0.2), byrow = T, ncol = 2, nrow = M)
phi <- c(0.8, 0.2)
sigma <- 1
n_parameters <- length(alpha) + length(gamma) + (length(phi) * 2) + length(sigma)
# + length(lambda)
## Start time
orig_time <- Sys.time()
file <- file("progress.txt")
## Global parameters
n_chains <- 2L # number of chains
n_iter <- 2000L # number of iterations (thinning half)
n_warmup <- 1000L # number of warmup
## simulations over
T.seq <- c(300)
N.seq <- c(1)
N.sim <- seq(1, 4)
total.sim <- length(T.seq) * length(N.seq) * length(N.sim)
## container
out_par <- matrix(ncol = 9, nrow = 0)
colnames(out_par) <- c("N", "T", "sim", "name", "mean", "sd", "q025", "q50", "q975")
data.fit.list <- list()
## Simulation
sim_counter <- 0
for (t1 in T.seq){
for (n1 in N.seq){
out <- dgp(n1, t1, M)
for (i_sim in N.sim){
sim_counter <- sim_counter + 1
fit <- stan("05_msm_simulation.stan",
data = out[["data"]],
chains = n_chains,
iter = n_iter,
warmup = n_warmup,
init_r = 0.5,
control = list(adapt_delta = .99))
## output
fit.ext <- as.matrix(fit)
for (ii in 1:n_parameters){
i_mean <- mean(fit.ext[, ii])
i_sd <- sd(fit.ext[, ii])
i_quan <- quantile(fit.ext[, ii], probs = c(0.025, 0.50, 0.975), names = F)
out_par <- rbind(out_par, c(n1, t1, sim_counter, colnames(fit.ext)[ii], i_mean, i_sd, i_quan ))
}
data.fit.list[[sim_counter]] <- list(fit.mat = fit,
data = out[["data"]])
progress <- paste("I've done", sim_counter, "of", total.sim, "simulations so far.", "The last size of N was", n1, ". The last size of T was",
t1 ,"The current date and time are", as.character(Sys.time()), "and I started at", as.character(orig_time))
writeLines(progress, file)
}
}
}
## clean output
out_par <- as.data.frame(out_par)
i_seq <- c(1, 2, 3, 5, 6, 7, 8, 9)
for (i in i_seq){
out_par[, i] <- as.numeric(as.character(out_par[, i]))
}
## save output
save(out_par, file = "04_msm_simulation.RData")
save(data.fit.list, file = "02_msm_simulation.RData")
functions {
vector vlnormal_pdf(vector y, vector x, real sigma){
return log(1/sqrt(2 * pi() * sigma) * exp(- ((y - x).*(y - x) ) ./ (2 * sigma) ));
}
}
data {
int<lower = 0> T; // number of observed time periods
int<lower = 0> N; // number of observed units
matrix[N, T] y; // outcome
int<lower = 0> M;
row_vector[M] z[N, T]; // exogenous random variable affecting transition probabilities
}
parameters {
real gamma[2];
ordered[2] alpha;
real<lower = 0, upper = 1> phi[2];
real<lower = 0> sigma;
}
transformed parameters {
real fy[N, T];
for (i in 1:N) {
vector[T] s[2];
vector[2] p[T, 2];
real p_s1[T, 2];
// t1
for (t in 1:T) {
// transition probabilities
p[t, 1, 1] = normal_cdf(gamma[1], 0, 1);
p[t, 2, 2] = normal_cdf(gamma[2], 0, 1);
p[t, 2, 1] = 1 - p[t, 2, 2];
}
for (j in 1:2){
s[j, 1] = normal_lpdf(y[i, 1] | alpha[j], sigma);
s[j, 2:T] = vlnormal_pdf(to_vector(y[i, 2:T]), to_vector(alpha[j] + phi[j] * y[i, 1:(T - 1)]), sigma);
}
// t = 1
// Initial probability
p_s1[1, 2] = (1 - p[1, 2, 2]) / (2 - p[1, 1, 1] - p[1, 2, 2]);
p_s1[1, 1] = p[1, 1, 1] * p_s1[1, 2] +
p[1, 2, 1] * (1 - p_s1[1, 2]);
fy[i,1] = log_mix(p_s1[1, 1], s[1, 1], s[2, 1]);
p_s1[1, 2] = exp(s[1, 1] + log(p_s1[1,1]) - fy[i,1]);
// t = 2:T
for (t in 2:T){
p_s1[t, 1] = p[(t - 1), 1, 1] * p_s1[(t - 1), 2]
+ p[(t - 1), 2, 1] * (1 - p_s1[(t - 1), 2]);
fy[i,t] = log_mix(p_s1[t, 1], s[1, t], s[2, t]);
p_s1[t, 2] = exp(s[1, t] + log(p_s1[t, 1]) - fy[i,t]);
}
}
}
model {
// likelihood
target += fy;
// priors
alpha ~ normal(0, 4);
phi ~ normal(0.5, 0.25);
gamma ~ normal(0, 1);
}