Hi!
I’m trying to do hierarchical survival modeling using the Weibull model. I can fit both the unpooled, pooled and hierarchical models fine but I’m baffled by the posterior produced by the latter one.
I’m using the lung data set from the R package survival with age and ph.karno as covariates. In the hierarchical model, I fit separate effects for ph.karno.
The pooled model produces a concentrated (sd = 0.06) posterior for the ph.karno effect, with the 95% CI not containing 0. In the hierarchical model the sex-specific effects have sd > 2 and the population distribution’s sd > 3. Fitting separate models for both sexes produces posteriors similar to the pooled model.
Here’s a figure with the effect posteriors for the pooled (black) and the hierarchical models.
To me the hierarchical model’s posterior doesn’t make sense, as I would expect it to be inline with the unpooled/pooled models. Could you share some insight why this happens?
Below is the code I used. I also fitted the models with brms which resulted in similar posteriors.
library(rstan)
library(survival)
library(tidyverse)
library(magrittr)
## Data **************************
my_data <- lung
my_data <- my_data[, c("age", "ph.karno", "status", "time", "sex")]
my_data <- my_data %>% drop_na
# Standardize
my_data[, c("age", "ph.karno")] <- apply(my_data[, c("age", "ph.karno")],
MARGIN = 2,
FUN = function(X) {(X-mean(X))/sd(X)})
## Pooled ************************
weibull_model <- stan_model("weibull.stan")
stan_data <- list(N = nrow(my_data),
Y = my_data$time,
cens = 2 - my_data$status,
K = 2,
X = my_data[, c("ph.karno", "age")],
N_censored = sum(my_data$status==1),
censored = which(my_data$status==1),
uncensored = which(my_data$status==2))
weibull_fit <- sampling(weibull_model,
stan_data,
iter = 2000, chains = 1)
## Hierarchical ******************
weibull_hier_model <- stan_model("weibull_hierarchical.stan")
stan_data_hier <- list(N = nrow(my_data),
Y = my_data$time,
cens = 2 - my_data$status,
K = 2,
X = my_data[, c("ph.karno", "age")],
N_censored = sum(my_data$status==1),
censored = which(my_data$status==1),
uncensored = which(my_data$status==2),
n_groups = length(unique(my_data$sex)),
group_ind = my_data$sex)
weibull_hier_fit <- sampling(weibull_hier_model,
stan_data_hier,
iter = 4000, chains = 1,
control = list(max_treedepth = 12))
## Results ***********************
# Effects
b1 <- data.frame(value = rstan::extract(weibull_fit, "b")[[1]][, 1],
model = "pooled")
bh <- rstan::extract(weibull_hier_fit, "bh")[[1]] %>%
data.frame %>%
set_colnames(unique(my_data$sex)) %>%
gather(key = "sex", value = value)
# Plot marginal posteriors
p_marginal <- ggplot() +
geom_density(data = bh,
aes(x = value, fill = sex),
alpha = 0.25
) +
geom_density(
data = b1,
aes(x = value))
p_marginal
The Stan models are modified from the code brms generates.
Pooled model:
// weibull.stan
data {
int<lower=1> N; // total number of observations
vector[N] Y; // response variable
array[N] int<lower=-1,upper=2> cens; // indicates censoring
int<lower=1> K; // number of population-level effects
matrix[N, K] X; // population-level design matrix
int N_censored;
int censored[N_censored]; // indices of censored subjects
int uncensored[N-N_censored];
}
parameters {
vector[K] b; // regression coefficients
real Intercept; // temporary intercept for centered predictors
real<lower=0> shape; // shape parameter
}
model {
// linear predictor
vector[N] mu = rep_vector(0.0, N);
mu += Intercept + X * b;
mu = exp(mu);
Y[uncensored] ~ weibull(shape, mu[uncensored] / tgamma(1 + 1 / shape));
target += weibull_lccdf(Y[censored] | shape, mu[censored] / tgamma(1 + 1 / shape));
// Priors
b ~ normal(0, 1);
Intercept ~ normal(0, 1);
shape ~ gamma(0.01, 0.01);
}
Hierarchical model:
// weibull_hierarchical.stan
data {
int<lower=1> N; // total number of observations
vector[N] Y; // response variable
array[N] int<lower=0,upper=1> cens; // indicates censoring
int<lower=1> K; // number of population-level effects
matrix[N, K] X; // population-level design matrix
int<lower=1> n_groups;
int group_ind[N];
int N_censored;
int censored[N_censored]; // indices of censored subjects
int uncensored[N-N_censored];
}
parameters {
vector[K-1] b; // regression coefficients
vector[n_groups] bh; // regression coefficients with hierarchy
real Intercept; // temporary intercept for centered predictors
real<lower=0> shape; // shape parameter
// Hyperparameters
real bh_mu;
real<lower=0> bh_sigma;
}
transformed parameters {
// Contribution of bh to likelihood
real bh_contr = 0;
for(i in 1:N) {
bh_contr += X[i, 1] * bh[group_ind[i]];
}
}
model {
// linear predictor
vector[N] mu = rep_vector(0.0, N);
mu += Intercept + X[, 2:K] * b;
mu += bh_contr;
mu = exp(mu);
Y[uncensored] ~ weibull(shape, mu[uncensored] / tgamma(1 + 1 / shape));
target += weibull_lccdf(Y[censored] | shape, mu[censored] / tgamma(1 + 1 / shape));
// Priors
Intercept ~ normal(0, 1);
shape ~ gamma(0.01, 0.01);
b ~ normal(0, 1);
// Hierarchy for bh
bh ~ normal(bh_mu, bh_sigma);
bh_mu ~ normal(0, 1);
bh_sigma ~ gamma(2, 1);
}
generated quantities {
real bh_population;
bh_population = normal_rng(bh_mu, bh_sigma);
}