Hi,
I am doing a meta-analysis to estimate the sensitivity and specificity of a new test for detection of a disease. Here is an example dataset, which illustrates the structure of the data.
library(tidyverse)
library(brms)
library(tidybayes)
#generate some data for sensitivity and specificity (id is study)
d <- tribble(
~id, ~sens, ~sens_lower, ~sens_upper, ~spec, ~spec_lower, ~spec_upper,
"A", 0.70, 0.47, 0.87, 0.93, 0.87, 0.97,
"B", 0.80, 0.28, 0.99, 0.50, 0.12, 0.88,
"C", 0.76, 0.67, 0.84, 0.99, 0.94, 0.99,
"D", 0.91, 0.85, 0.95, 0.97, 0.85, 0.98,
"E", 0.84, 0.69, 0.93, 0.92, 0.74, 0.99
)
#plot to confirm okay
d %>%
pivot_longer(
cols = -id,
names_to = c("measure", "type"),
names_sep = "_"
) %>%
pivot_wider(
names_from = type,
values_from = value,
names_prefix = "value_"
) %>%
rename(value = value_NA) %>%
ggplot() +
geom_pointrange(aes(y=id, x=value, xmin=value_lower, xmax=value_upper, colour=measure)) +
facet_grid(measure~.) +
scale_y_discrete(limits=rev)
#> Warning: Expected 2 pieces. Missing pieces filled with `NA` in 2 rows [1, 4].
Created on 2024-12-10 with reprex v2.1.1
Using the approach described here, we can convert the 95% confidence intervals for sensitivity and specificity for each study into a dispersion parameter.
mdata <- d %>%
mutate(se_sens = (sens_upper-sens_lower)/3.92) %>%
mutate(se_spec = (spec_upper-spec_lower)/3.92) %>%
mutate(derived.phi_sens = (sens/se_sens^2) - (sens/se_sens)^2 - 1) %>%
mutate(derived.phi_spec = (spec/se_spec^2) - (spec/se_spec)^2 - 1) %>%
select(id, sens, derived.phi_sens, spec, derived.phi_spec)
And can then construct a multilevel meta-analysis model using beta distributions with sensitivity and specificity correlated by study
#Construct meta-analysis model
m1 <- brm(
bf(sens ~ 1 + (1|p|id), phi ~ 0 + offset(derived.phi_sens)) +
bf(spec ~ 1 + (1|p|id), phi ~ 0 + offset(derived.phi_spec)),
data = mdata,
family = Beta(link_phi = "identity"),
control = list(adapt_delta = 0.99),
cores = 4,
chains = 4,
iter = 4000
)
When we summarise the posteriors, these look “sensible” to me:
#Pooled estimates
add_epred_draws(object = m1,
newdata = mdata,
re_formula = NA) %>%
group_by(.category) %>%
median_qi(.epred)
#> # A tibble: 2 Ă— 7
#> .category .epred .lower .upper .width .point .interval
#> <fct> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
#> 1 sens 0.815 0.582 0.923 0.95 median qi
#> 2 spec 0.925 0.610 0.984 0.95 median qi
#plot pooled estimates
add_epred_draws(object = m1,
newdata = mdata,
re_formula = NA) %>%
ggplot() +
stat_halfeye(aes(x=.epred, fill=.category), alpha=0.8) +
facet_grid(.category~.)
#how well are we recovering study-level estimates
add_epred_draws(object = m1,
newdata = mdata) %>%
group_by(id, .category) %>%
median_qi(.epred) %>%
full_join(mdata %>% select(id, sens, spec) %>% pivot_longer(-id) %>% rename(.category=name)) %>%
ggplot() +
geom_pointrange(aes(y=id, x=.epred, xmin=.lower, xmax=.upper, colour=.category)) +
geom_point(aes(y=id, x=value, group=.category), colour="black", shape = 4) +
facet_grid(.category~.) +
scale_y_discrete(limits=rev)
#> Joining with `by = join_by(id, .category)`
Created on 2024-12-10 with reprex v2.1.1
So, this all seems to work, but I had a couple of questions:
-
Does this seem like a reasonable approach, given the data available? Any recommended alternatives?
-
I note that when compiling the model, I get the warning
Setting 'rescor' to FALSE by default for this model
. AKAIK, it is only possible toset_rescor=TRUE
for gaussian models (I couldn’t get this dev branch discussed by @andrjohns to install). What are the consequences of not addressing this?
Thanks!