Hello Everyone,
I’m trying to do a simulation study of survival-longitudinal joint modeling. Currently working on the complete-case situation (i.e., no censoring). Since this is a simulation, I know the true parameters. It seems that the estimated coefficients of the covariates (X) in both the survival sub-model and longitudinal sub-model are quite off. Others are acceptable. I wonder if there’s any specific reason for it.
I would really appreciate any suggestions that can speed up the computation. Thanks a lot!
Best,
Li
Model
Survival Sub-Model
Longitudinal Sub-Model
Data Simulation
########################## true parameters ##########################
#### Scenario A: biomarker variability not associated with survival
m = 300 # number of patients
p = 3 # number of covariates
n = sample(x = 10:50, size = m, replace = T) # number of check-ups
### Survival
rho = 2
alpha_0 = -1.5
alphas = c(0.5, -0.75, 1.0)
gamma_1 = 0.2
gamma_2 = 1
gamma_3 = 0
### Longitudinal
beta_0 = 24
betas = c(-0.5, 2, 1)
tau = 0
sigma_I = 2
sigma_S = 0.5
mu_V = 1.5
sigma_V = 0.25
#### Scenario B: biomarker variability associated with survival
gamma_3 = 0.5
#####################################################################
# Simulation
set.seed(123)
X = matrix(nrow = m, ncol = p)
Y = list()
e = list()
I = numeric(m)
S = numeric(m)
V = numeric(m)
lambda = numeric(m)
T_surv = numeric(m)
T_cens = numeric(m)
T_obs = numeric(m)
status = numeric(m)
t = list()
T_obs = numeric(m)
for (i in 1:m) {
X[i, ] <- rnorm(p, mean = 0, sd = 1)
I[i] <- rnorm(1, mean = 0, sd = sigma_I)
S[i] <- rnorm(1, mean = 0, sd = sigma_S)
V[i] <- rlnorm(1, meanlog = mu_V, sdlog = sigma_V)
# survival sub-model
lambda[i] <- exp(alpha_0 + sum(alphas * X[i]) + gamma_1 * I[i] + gamma_2 * S[i] + gamma_3 * log(V[i]))
T_surv[i] <- rweibull(1, rho, (lambda[i] ^ (- 1 / rho)))
T_cens[i] <- runif(1, min = 0, max = 12)
T_cens[i] <- Inf # no censor
T_obs[i] <- min(T_cens[i], T_surv[i])
status[i] <- as.numeric(T_surv[i] <= T_cens[i])
# longitudinal sub-model
t[[i]] <- sort(runif(n[i])) * T_obs[i]
e[[i]] <- rnorm(n[i], 0, V[i])
Y[[i]] <- beta_0 + I[i] + sum(betas * X[i]) + (tau + S[i]) * t[[i]] + e[[i]]
}
#####################################################################
# Model Fitting
sm <- stan_model("joint_model_cc.stan")
stan_data <- list(
m = m,
p = p,
n = n,
N = sum(n),
X = X,
y = unlist(Y),
t = unlist(t),
T_obs = T_obs
)
fit <- sampling(sm, data = stan_data, seed = 42, chains = 8, cores = 4, iter = 10000)
data {
int<lower=1> m; // number of patients
int<lower=0> p; // number of covariates
int n[m]; // array of observations per patient
int<lower=1> N; // number of observations
matrix[m, p] X; // covariates matrix
vector[N] y; // flattend longitudinal outcome
vector[N] t; // flattend check-up points
vector[m] T_obs; // survival time
}
parameters {
// Survival Sub-Model
real<lower=0> rho; // suvival sub-model Weibull distribution shape
real alpha_0; // survival sub-model intercept
vector[p] alphas; // survival sub-model covariates coefficients
real gamma_1; // survival sub-model random intercept coefficient
real gamma_2; // survival sub-model random slope coefficient
real gamma_3; // survival sub-model random variability coefficient
// Longitudinal Sub-Model
real beta_0; // longitudinal sub-model intercept
vector[p] betas; // longitudinal sub-model covariates coefficients
real tau; // longitudinal fixed time coefficient
real<lower=0> sigma_I; // random intercept standard deviation
real<lower=0> sigma_S; // random slope standard deviation
real mu_V; // random variability log-normal mean
real<lower=0> sigma_V; // random variability log-normal standard deviation
vector[m] I; // random intercept
vector[m] S; // random slope
vector<lower=0>[m] V; // random variability
}
model{
int pos = 0;
vector[m] lambda; // survival sub-model Weibull distribution scale
vector[N] mu_y; // longitudinal sub-model subject-level mean
// priors
rho ~ gamma(1, 1);
alpha_0 ~ normal(0, 1);
alphas ~ normal(0, 1);
gamma_1 ~ normal(0, 1);
gamma_2 ~ normal(0, 1);
gamma_3 ~ normal(0, 1);
beta_0 ~ normal(24, 1);
betas ~ normal(0, 1);
tau ~ normal(0, 1);
sigma_I ~ lognormal(0, 1);
sigma_S ~ lognormal(0, 1);
mu_V ~ normal(0, 1);
sigma_V ~ lognormal(0, 1);
// likelihood
for (i in 1:m) {
I[i] ~ normal(0, sigma_I);
S[i] ~ normal(0, sigma_S);
V[i] ~ lognormal(mu_V, sigma_V);
// survival
lambda[i] = exp(alpha_0 + dot_product(alphas, X[i, ]) + gamma_1 * I[i] + gamma_2 * S[i] + gamma_3 * log(V[i]));
T_obs[i] ~ weibull(rho, (lambda[i] ^ (- 1 / rho)));
// longitudinal
for (j in 1:n[i]) {
pos += 1;
mu_y[pos] = beta_0 + I[i] + dot_product(betas, X[i, ]) + (tau + S[i]) * t[pos];
y[pos] ~ normal(mu_y[pos], V[i]);
}
}
}
Summary Table
true_vals mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
rho 2.00 1.97 0.00 0.13 1.74 1.88 1.95 2.04 2.26 4548.22 1.00
alpha_0 -1.50 -1.19 0.00 0.39 -1.95 -1.45 -1.19 -0.92 -0.44 19487.15 1.00
alphas[1] 0.50 0.86 0.00 0.09 0.68 0.79 0.85 0.91 1.05 8129.91 1.00
alphas[2] -0.75 0.01 0.00 0.07 -0.12 -0.04 0.01 0.05 0.15 18144.00 1.00
alphas[3] 1.00 -0.03 0.00 0.08 -0.18 -0.08 -0.03 0.02 0.12 20695.98 1.00
gamma_1 0.20 0.24 0.00 0.04 0.16 0.22 0.24 0.27 0.32 25617.94 1.00
gamma_2 1.00 0.85 0.01 0.38 0.18 0.62 0.84 1.07 1.62 2916.70 1.00
gamma_3 0.50 0.30 0.00 0.25 -0.20 0.13 0.30 0.47 0.79 20114.21 1.00
beta_0 24.00 23.97 0.00 0.14 23.68 23.87 23.97 24.06 24.25 6523.86 1.00
betas[1] -0.50 2.67 0.00 0.13 2.42 2.58 2.67 2.76 2.92 7571.03 1.00
betas[2] 2.00 -0.14 0.00 0.13 -0.40 -0.23 -0.15 -0.06 0.11 8278.45 1.00
betas[3] 1.00 0.02 0.00 0.14 -0.26 -0.07 0.02 0.12 0.30 6892.24 1.00
tau 0.00 -0.05 0.00 0.13 -0.30 -0.14 -0.06 0.03 0.21 1783.79 1.01
sigma_I 2.00 1.99 0.00 0.10 1.80 1.92 1.99 2.06 2.20 28977.09 1.00
sigma_S 0.50 0.49 0.01 0.12 0.24 0.41 0.49 0.57 0.72 307.64 1.03
mu_V 1.50 1.50 0.00 0.02 1.46 1.49 1.50 1.51 1.53 37618.18 1.00
sigma_V 0.25 0.27 0.00 0.01 0.24 0.26 0.27 0.28 0.30 27609.39 1.00