Hi Stan forum
This is my first time posting on here and it seems a wonderful and friendly resource! Sorry for the verbose post I opted for full code reproducibility above brevity in places!
I am fitting a proportional-hazards model using the survival branch from rstanarm. I model survival of larvae that have been inoculated with bacteria to test for differences in virulence between treatments. My treatments have multiple levels and I want to know how to do pairwise contrasts of hazard ratios. Below is a full reproducible example produced using the R package reprex
.
My key questions are:
- How would you check for “significant” differences in survival between levels of a covariate?
- How do you calculate hazard ratios between any two levels of a covariate? See section calculate hazard ratios for code implementing this.
- Can you average over predictions of each random effect within each fixed effect to get overall predictions for each fixed effect level? See section visualise survival curve for code implementing this.
# load packages
library(rstanarm)
library(tidyverse)
library(tidybayes)
library(bayesplot)
# load in data
d <- data.frame(
stringsAsFactors = FALSE,
sample = c("T22","T22","T22","T22",
"T22","T22","T22","T22","T22","T22","V22","V22",
"V22","V22","V22","V22","V22","V22","V22","V22","L22",
"L22","L22","L22","L22","L22","L22","L22","L22",
"L22","T23","T23","T23","T23","T23","T23","T23",
"T23","T23","T23","V23","V23","V23","V23","V23",
"V23","V23","V23","V23","V23","L23","L23","L23",
"L23","L23","L23","L23","L23","L23","L23","T24",
"T24","T24","T24","T24","T24","T24","T24","T24",
"T24","V24","V24","V24","V24","V24","V24","V24","V24",
"V24","V24","L24","L24","L24","L24","L24","L24",
"L24","L24","L24","L24","T25","T25","T25","T25",
"T25","T25","T25","T25","T25","T25","V25","V25",
"V25","V25","V25","V25","V25","V25","V25","V25",
"L25","L25","L25","L25","L25","L25","L25","L25",
"L25","L25","T26","T26","T26","T26","T26","T26",
"T26","T26","T26","T26","V26","V26","V26","V26","V26",
"V26","V26","V26","V26","V26","L26","L26","L26",
"L26","L26","L26","L26","L26","L26","L26","T27",
"T27","T27","T27","T27","T27","T27","T27","T27",
"T27","V27","V27","V27","V27","V27","V27","V27",
"V27","V27","V27","L27","L27","L27","L27","L27",
"L27","L27","L27","L27","L27","T28","T28","T28",
"T28","T28","T28","T28","T28","T28","T28","V28","V28",
"V28","V28","V28","V28","V28","V28","V28","V28",
"L28","L28","L28","L28","L28","L28","L28","L28",
"L28","L28"),
plastic = c("T","T","T","T","T","T",
"T","T","T","T","V","V","V","V","V","V","V","V",
"V","V","L","L","L","L","L","L","L","L","L",
"L","T","T","T","T","T","T","T","T","T","T",
"V","V","V","V","V","V","V","V","V","V","L","L",
"L","L","L","L","L","L","L","L","T","T","T",
"T","T","T","T","T","T","T","V","V","V","V","V",
"V","V","V","V","V","L","L","L","L","L","L",
"L","L","L","L","T","T","T","T","T","T","T",
"T","T","T","V","V","V","V","V","V","V","V","V",
"V","L","L","L","L","L","L","L","L","L","L",
"T","T","T","T","T","T","T","T","T","T","V","V",
"V","V","V","V","V","V","V","V","L","L","L",
"L","L","L","L","L","L","L","T","T","T","T",
"T","T","T","T","T","T","V","V","V","V","V","V",
"V","V","V","V","L","L","L","L","L","L","L",
"L","L","L","T","T","T","T","T","T","T","T","T",
"T","V","V","V","V","V","V","V","V","V","V",
"L","L","L","L","L","L","L","L","L","L"),
time = c(24L,24L,24L,24L,24L,24L,
24L,48L,48L,48L,24L,24L,24L,24L,24L,24L,24L,24L,
24L,48L,24L,24L,24L,48L,48L,48L,48L,48L,48L,
48L,24L,24L,24L,24L,24L,24L,24L,24L,24L,24L,
24L,24L,24L,24L,24L,24L,24L,24L,24L,48L,24L,24L,
24L,24L,24L,24L,24L,24L,48L,48L,24L,48L,48L,
48L,48L,48L,48L,48L,48L,48L,48L,48L,48L,48L,48L,
48L,48L,48L,48L,48L,24L,24L,24L,24L,24L,48L,
48L,48L,48L,48L,24L,24L,24L,24L,24L,24L,24L,
24L,24L,48L,24L,24L,24L,24L,48L,48L,48L,48L,48L,
48L,48L,48L,48L,48L,48L,48L,48L,48L,48L,48L,
24L,24L,24L,24L,24L,24L,24L,24L,48L,48L,24L,24L,
24L,24L,24L,24L,24L,24L,24L,24L,48L,48L,48L,
48L,48L,48L,48L,48L,48L,48L,24L,24L,24L,24L,
24L,24L,24L,48L,48L,48L,24L,24L,24L,24L,24L,24L,
24L,24L,24L,24L,24L,24L,48L,48L,48L,48L,48L,
48L,48L,48L,24L,24L,24L,24L,24L,24L,24L,24L,24L,
48L,24L,24L,24L,24L,24L,24L,24L,24L,24L,24L,
24L,24L,24L,48L,48L,48L,48L,48L,48L,48L),
status = c(1L,1L,1L,1L,1L,1L,1L,1L,
1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,
1L,1L,0L,0L,0L,0L,0L,0L,1L,1L,1L,1L,1L,1L,
1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,0L,1L,
1L,1L,1L,1L,1L,1L,1L,1L,0L,1L,1L,0L,0L,0L,
0L,0L,0L,0L,0L,0L,0L,0L,0L,0L,0L,0L,0L,0L,
0L,1L,1L,1L,1L,1L,1L,0L,0L,0L,0L,1L,1L,1L,
1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,
0L,0L,1L,1L,0L,0L,0L,0L,0L,0L,0L,0L,1L,1L,
1L,1L,1L,1L,1L,1L,1L,0L,1L,1L,1L,1L,1L,1L,
1L,1L,1L,1L,1L,0L,0L,0L,0L,0L,0L,0L,0L,0L,
1L,1L,1L,1L,1L,1L,1L,1L,1L,0L,1L,1L,1L,1L,1L,
1L,1L,1L,1L,1L,1L,1L,1L,0L,0L,0L,0L,0L,0L,
0L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,
1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,0L,0L,0L,
0L,0L,0L)
)
# run a model where we look for differences in survival between plastics
mod <- stan_surv(
Surv(time, status) ~ plastic + (1 | sample),
data = d,
chains = 3,
cores = 3,
seed = 42,
iter = 3000
)
mod
#> stan_surv
#> baseline hazard: M-splines on hazard scale
#> formula: Surv(time, status) ~ plastic + (1 | sample)
#> observations: 210
#> events: 146 (69.5%)
#> right censored: 64 (30.5%)
#> delayed entry: no
#> ------
#> Median MAD_SD exp(Median)
#> (Intercept) -0.7 0.4 NA
#> plasticT 1.4 0.6 4.1
#> plasticV 1.4 0.6 4.2
#> m-splines-coef1 0.0 0.0 NA
#> m-splines-coef2 0.0 0.0 NA
#> m-splines-coef3 0.0 0.0 NA
#> m-splines-coef4 0.7 0.1 NA
#> m-splines-coef5 0.0 0.0 NA
#> m-splines-coef6 0.3 0.1 NA
#>
#> Error terms:
#> Groups Name Std.Dev.
#> sample (Intercept) 1.1
#> Num. levels: sample 21
#>
#> ------
#> * For help interpreting the printed output see ?print.stanreg
#> * For info on the priors used see ?prior_summary.stanreg
summary(mod)
#>
#> Model Info:
#>
#> function: stan_surv
#> baseline hazard: M-splines on hazard scale
#> formula: Surv(time, status) ~ plastic + (1 | sample)
#> algorithm: sampling
#> sample: 4500 (posterior sample size)
#> priors: see help('prior_summary')
#> observations: 210
#> events: 146 (69.5%)
#> right censored: 64 (30.5%)
#> delayed entry: no
#>
#> Estimates:
#> mean sd 10% 50% 90%
#> (Intercept) -0.6 0.4 -1.2 -0.7 -0.1
#> plasticT 1.4 0.6 0.6 1.4 2.1
#> plasticV 1.4 0.6 0.7 1.4 2.2
#> m-splines-coef1 0.0 0.0 0.0 0.0 0.0
#> m-splines-coef2 0.0 0.0 0.0 0.0 0.0
#> m-splines-coef3 0.0 0.0 0.0 0.0 0.0
#> m-splines-coef4 0.7 0.1 0.6 0.7 0.7
#> m-splines-coef5 0.0 0.0 0.0 0.0 0.0
#> m-splines-coef6 0.3 0.1 0.2 0.3 0.4
#> b[(Intercept) sample:L22] -0.1 0.6 -0.8 -0.1 0.7
#> b[(Intercept) sample:L23] 1.4 0.5 0.7 1.4 2.1
#> b[(Intercept) sample:L24] 0.5 0.6 -0.2 0.5 1.2
#> b[(Intercept) sample:L25] -0.8 0.6 -1.6 -0.8 0.0
#> b[(Intercept) sample:L26] -1.2 0.7 -2.1 -1.1 -0.3
#> b[(Intercept) sample:L27] -0.4 0.6 -1.2 -0.4 0.4
#> b[(Intercept) sample:L28] -0.1 0.6 -0.9 -0.1 0.7
#> b[(Intercept) sample:T22] 0.1 0.5 -0.6 0.1 0.7
#> b[(Intercept) sample:T23] 0.9 0.5 0.2 0.8 1.5
#> b[(Intercept) sample:T24] -1.8 0.6 -2.6 -1.8 -1.0
#> b[(Intercept) sample:T25] 0.5 0.5 -0.1 0.5 1.2
#> b[(Intercept) sample:T26] 0.2 0.5 -0.5 0.2 0.8
#> b[(Intercept) sample:T27] 0.0 0.5 -0.7 0.0 0.6
#> b[(Intercept) sample:T28] 0.5 0.5 -0.1 0.5 1.2
#> b[(Intercept) sample:V22] 0.5 0.5 -0.2 0.5 1.1
#> b[(Intercept) sample:V23] 0.4 0.5 -0.3 0.4 1.0
#> b[(Intercept) sample:V24] -2.5 0.8 -3.5 -2.5 -1.6
#> b[(Intercept) sample:V25] -0.6 0.5 -1.3 -0.6 0.0
#> b[(Intercept) sample:V26] 0.8 0.5 0.1 0.8 1.5
#> b[(Intercept) sample:V27] 0.8 0.5 0.1 0.8 1.5
#> b[(Intercept) sample:V28] 0.8 0.5 0.2 0.8 1.5
#> Sigma[sample:(Intercept),(Intercept)] 1.3 0.6 0.7 1.2 2.0
#>
#> MCMC diagnostics
#> mcse Rhat n_eff
#> (Intercept) 0.0 1.0 1521
#> plasticT 0.0 1.0 1401
#> plasticV 0.0 1.0 1531
#> m-splines-coef1 0.0 1.0 6213
#> m-splines-coef2 0.0 1.0 7303
#> m-splines-coef3 0.0 1.0 6066
#> m-splines-coef4 0.0 1.0 6160
#> m-splines-coef5 0.0 1.0 5926
#> m-splines-coef6 0.0 1.0 6817
#> b[(Intercept) sample:L22] 0.0 1.0 2357
#> b[(Intercept) sample:L23] 0.0 1.0 2126
#> b[(Intercept) sample:L24] 0.0 1.0 2312
#> b[(Intercept) sample:L25] 0.0 1.0 3121
#> b[(Intercept) sample:L26] 0.0 1.0 2501
#> b[(Intercept) sample:L27] 0.0 1.0 2730
#> b[(Intercept) sample:L28] 0.0 1.0 2386
#> b[(Intercept) sample:T22] 0.0 1.0 1615
#> b[(Intercept) sample:T23] 0.0 1.0 1535
#> b[(Intercept) sample:T24] 0.0 1.0 2333
#> b[(Intercept) sample:T25] 0.0 1.0 1551
#> b[(Intercept) sample:T26] 0.0 1.0 1530
#> b[(Intercept) sample:T27] 0.0 1.0 1705
#> b[(Intercept) sample:T28] 0.0 1.0 1534
#> b[(Intercept) sample:V22] 0.0 1.0 1939
#> b[(Intercept) sample:V23] 0.0 1.0 2131
#> b[(Intercept) sample:V24] 0.0 1.0 2880
#> b[(Intercept) sample:V25] 0.0 1.0 2156
#> b[(Intercept) sample:V26] 0.0 1.0 1955
#> b[(Intercept) sample:V27] 0.0 1.0 1960
#> b[(Intercept) sample:V28] 0.0 1.0 2020
#> Sigma[sample:(Intercept),(Intercept)] 0.0 1.0 1398
#> log-posterior 0.2 1.0 970
#>
#> For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).
Visualise survival curve
# visualise survival curve for each plastic
# plot predictions for each plastic averaged over the random effect of each replicate for each treatment
# hoping this is equivalent to looking at reform=NA in brms and linear mixed effects models
d_preds <- select(d, plastic, sample) %>%
distinct() %>%
mutate(id = 1:n(),
plastic2 = plastic) %>%
nest_legacy(-c(plastic2)) %>%
mutate(., preds = map(
data,
~ posterior_survfit(
mod,
newdata = .x,
times = 0,
standardise = TRUE,
extrapolate = TRUE,
dynamic = TRUE
)
))
d_preds <- unnest(d_preds, preds) %>%
select(-data)
ggplot(d_preds,
aes(
time,
col = plastic2,
fill = plastic2,
group = plastic2
)) +
geom_line(aes(y = median)) +
geom_ribbon(
aes(time, ymin = ci_lb, ymax = ci_ub),
col = NA,
alpha = 0.2,
show.legend = FALSE
) +
ylim(c(0, 1)) +
theme_bw()
Calculate pairwise hazard ratios
# so how do I do contrasts between samples to look at whether some plastics change differently to others...
# 1. compare hazard ratios between all pairs of plastics - credible intervals non-overlapping 1 indicate difference
# 2. calculate survival probability at the end of the experiment (time = 48) for each plastic and non-overlapping 95% credible intervals indicate differences
# get a list of the variables in the model
tidybayes::get_variables(mod)
#> [1] "(Intercept)"
#> [2] "plasticT"
#> [3] "plasticV"
#> [4] "m-splines-coef1"
#> [5] "m-splines-coef2"
#> [6] "m-splines-coef3"
#> [7] "m-splines-coef4"
#> [8] "m-splines-coef5"
#> [9] "m-splines-coef6"
#> [10] "b[(Intercept) sample:L22]"
#> [11] "b[(Intercept) sample:L23]"
#> [12] "b[(Intercept) sample:L24]"
#> [13] "b[(Intercept) sample:L25]"
#> [14] "b[(Intercept) sample:L26]"
#> [15] "b[(Intercept) sample:L27]"
#> [16] "b[(Intercept) sample:L28]"
#> [17] "b[(Intercept) sample:T22]"
#> [18] "b[(Intercept) sample:T23]"
#> [19] "b[(Intercept) sample:T24]"
#> [20] "b[(Intercept) sample:T25]"
#> [21] "b[(Intercept) sample:T26]"
#> [22] "b[(Intercept) sample:T27]"
#> [23] "b[(Intercept) sample:T28]"
#> [24] "b[(Intercept) sample:V22]"
#> [25] "b[(Intercept) sample:V23]"
#> [26] "b[(Intercept) sample:V24]"
#> [27] "b[(Intercept) sample:V25]"
#> [28] "b[(Intercept) sample:V26]"
#> [29] "b[(Intercept) sample:V27]"
#> [30] "b[(Intercept) sample:V28]"
#> [31] "b[(Intercept) sample:_NEW_sample]"
#> [32] "Sigma[sample:(Intercept),(Intercept)]"
#> [33] "accept_stat__"
#> [34] "stepsize__"
#> [35] "treedepth__"
#> [36] "n_leapfrog__"
#> [37] "divergent__"
#> [38] "energy__"
to_plot <- tidybayes::get_variables(mod)[1:6]
# check key mcmc trace plots
mcmc_trace(mod, pars = to_plot)
# extract draws
params <- spread_draws(mod,!!!syms(to_plot)) %>%
janitor::clean_names()
head(params)
#> # A tibble: 6 x 9
#> chain iteration draw intercept plastic_t plastic_v m_splines_coef1
#> <int> <int> <int> <dbl> <dbl> <dbl> <dbl>
#> 1 1 1 1 -1.08 1.56 1.62 0.000373
#> 2 1 2 2 -0.632 1.59 1.17 0.00267
#> 3 1 3 3 -0.521 1.57 1.02 0.00120
#> 4 1 4 4 -0.431 1.10 1.19 0.00256
#> 5 1 5 5 -0.000460 0.887 0.874 0.00127
#> 6 1 6 6 -0.607 1.12 1.55 0.000773
#> # … with 2 more variables: m_splines_coef2 <dbl>, m_splines_coef3 <dbl>
# calculate hazard for each plastic not normalised to intercept (plastic_h)
params <- rename(params, plastic_h = intercept) %>%
# calculate hazard for each plastic not normalised to intercept (plastic_h)
mutate(across(plastic_l:plastic_w, function(x) {
x + plastic_h
}))
#> Error: Problem with `mutate()` input `..1`.
#> â„ą `..1 = across(...)`.
#> x Can't subset columns that don't exist.
#> x Column `plastic_l` doesn't exist.
# calculate a specific contrast... plastic_w vs plastic_p
params <-
mutate(
params,
hazard_ratio_v_vs_t = exp(plastic_v - plastic_t),
hazard_ratio_v_vs_l = exp(plastic_v - plastic_l)
)
#> Error: Problem with `mutate()` column `hazard_ratio_v_vs_l`.
#> â„ą `hazard_ratio_v_vs_l = exp(plastic_v - plastic_l)`.
#> x object 'plastic_l' not found
quantile(params$hazard_ratio_p_vs_w, probs = c(0.025, 0.5, 0.975))
#> Warning: Unknown or uninitialised column: `hazard_ratio_p_vs_w`.
#> 2.5% 50% 97.5%
#> NA NA NA
# no difference
quantile(params$hazard_ratio_v_vs_l, probs = c(0.025, 0.5, 0.975))
#> Warning: Unknown or uninitialised column: `hazard_ratio_v_vs_l`.
#> 2.5% 50% 97.5%
#> NA NA NA
# on average a 4 times higher hazard of death
# could do this for as many contrasts as I need
# calculate predicted survival at the end of the experiment
filter(d_preds, time == max(d_preds$time))
#> # A tibble: 3 x 5
#> plastic2 time median ci_lb ci_ub
#> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 T 48 0.165 0.115 0.224
#> 2 V 48 0.177 0.135 0.238
#> 3 L 48 0.577 0.476 0.672
# non-overlapping estimates mean significant differences in survival probability...
Created on 2021-05-17 by the reprex package (v2.0.0)
PS. I also have the current problem that the models do not work: Running stan_surv() with example data crashes both R and RStudio · Issue #508 · stan-dev/rstanarm · GitHub, but curiously models with random effects do.