Hi there,
tidybayes::add_predicted_draws median/mean plot “intercept” looks different to model fit
intercept. It sits (somewhat) above the posterior median_qi/mean_qi values -see attached image (red abline versus grey lines).
Note, updated code for better showing difference
How is this best explained?
note: below code can run very slow - I can provide fit(1,2).rds
# Load necessary libraries
libary(tidyverse)
library(brms)
library(cmdstanr)
library(see)
require(modelr)
require(tidybayes)
require(ggthemes)
require(patchwork)
options(scipen=99999)
options(mc.cores=parallel::detectCores())
set.seed(123)
a_increase <- 0.001
b_increase <- 0.002
noise = rnorm(750, mean = 0, sd = 2)
df <- tibble(
x = rep(c(1.3, 1.4, 1.6, 3.5, 3.6, 5.0, 5.1, 5.2, 5.4, 5.8, 6.0, 6.1, 6.4, 6.5, 6.8, 7.6, 7.7, 7.8, 9.0, 9.5, 9.7, 9.8, 10.1, 10.2, 10.5, 10.6, 10.7, 11.2, 11.4, 11.5), 25),
a = factor(sample(c("a1", "a2", "a3"), 750, replace = TRUE, prob = c(0.20, 0.25, 0.55))),
b = factor(sample(c("b1", "b2"), 750, replace = TRUE, prob = c(0.52, 0.48))),
y = 26 + 0.2 * x + a_increase * as.numeric(a)*runif(750,-1,1) + b_increase * as.numeric(b)*runif(750,-1,1)) %>%
mutate(y = round(y + noise+runif(750,-4,4), 1)) %>%
mutate(y = (y - min(y)) / (max(y) - min(y)) * (38 - 26) + 26)%>%
arrange(x)%>%
mutate(b = factor(if_else(x > 8 & a=="a1", "b2",as.character(b))))%>%
mutate(a = factor(ifelse(x < 4, sample(c("a1", "a2"), n(), replace = TRUE), as.character(a))))
ggplot(df, aes(x=x, y=y))+
geom_point(aes(color=a, fill=b, shape=b))
ggplot(df, aes(x = x, y = y)) +
geom_point(aes(color = a, shape = b,fill=b), size = 1.5) +
scale_color_okabeito() +
theme_minimal()
df$SD=0.23
df$xSD=1
# Fit Bayesian regression model with cmdstanr backend
prs1=c(
prior(cauchy(32,2), class="Intercept"),
prior (student_t(3, 1, 2.5), class="b"),
prior (normal(0,1), class="b", coef="mexxSD"),
prior (normal(0,1), class="sd"),
prior (normal(2,1.25), class="sd", coef="Intercept", group="a"),
# prior (normal(1,1), class="sd", coef="Intercept", group="b"),
prior (cauchy(0,2), class="sigma"),
prior (cauchy(0,3), class = "meanme"),
prior (cauchy(0,3), class = "sdme")
)
fit1 <- brm(y|mi(sdy=SD) ~ me(x,xSD) + (1|a/b), data = df, family = gaussian(), backend = "cmdstanr",
#threads=threading(8),
prior=prs1,
control=list(
adapt_delta=0.9,step_size=0.1,
max_treedepth=18),
file="fit1.rds",
save_latent_dynamics=T,
file_refit="always",
refresh=20
)
prs2=c(
prior(cauchy(32,2), class=b,coef="Intercept"),
prior (student_t(3, 1, 2.5), class="b"),
prior (normal(0,1), class="b", coef="mexxSD"),
prior (normal(0,1), class="sd"),
prior (normal(2,1.25), class="sd", coef="Intercept", group="a"),
# prior (normal(1,1), class="sd", coef="Intercept", group="b"),
prior (cauchy(0,2), class="sigma"),
prior (cauchy(0,3), class = "meanme"),
prior (cauchy(0,3), class = "sdme")
)
fit2 <- brm(y|mi(sdy=SD) ~ 0+Intercept+ me(x,xSD) + (1|a/b), data = df, family = gaussian(), backend = "cmdstanr",
prior=prs2,
control=list(
adapt_delta=0.9,step_size=0.1,
max_treedepth=18),
file="fit2.rds",
save_latent_dynamics=T,
file_refit="always",
refresh=20
)
####################### posteriors
out1=df|>data_grid(x=seq(0,1500, by=100)/100,
a=a,
b=b, SD=0.23,xSD=1) |>
add_predicted_draws(fit1)
out2=df|>data_grid(x=seq(0,1500, by=100)/100,
a=a,
b=b, SD=0.23,xSD=1) |>
add_predicted_draws(fit2)
out=bind_rows(out1%>%mutate(fit="fit1"),
out1%>%mutate(fit="fit2"))
################ plotting
txtsize=14
p1=ggplot(data=out,aes(x))+
stat_lineribbon(data=out,aes(y=.prediction),.width = c(0.95), point_interval = "median_qi",
alpha = 1, color = "grey40", fill="grey80",show.legend = F)+
geom_point(data=df,aes(y=y, x= x), color="black",
shape = 3, size=1,alpha=0.75,
show.legend=F)+
theme_clean(base_size = txtsize)+
ylab("Posterior prediction with (95% CI)")+
xlab("x")+
theme(plot.background = element_rect(
color = "white"
))+scale_x_continuous(breaks=seq(0,12,by=1))+
scale_y_continuous(breaks=seq(26,38,by=1),
expand = c(0, 0)) +
coord_cartesian(xlim = c(0, 12), expand = F, clip = "on")+
labs(subtitle="median_qi") +
geom_abline(slope=summary(fit1)$fixed[2,1], intercept=summary(fit1)$fixed[1,1], color="salmon2")+
facet_grid(fit~.)
p2=ggplot(data=out,aes(x))+
stat_lineribbon(data=out,aes(y=.prediction),.width = c(0.95), point_interval = "mean_qi",
alpha = 1, color = "grey40", fill="grey80",show.legend = F)+
geom_point(data=df,aes(y=y, x= x), color="black",
shape = 3, size=1,alpha=0.75,
show.legend=F)+
theme_clean(base_size = txtsize)+
ylab("Posterior prediction with (95% CI)")+
xlab("x")+
theme(plot.background = element_rect(
color = "white"
))+scale_x_continuous(breaks=seq(0,12,by=1))+
scale_y_continuous(breaks=seq(26,38,by=1),
expand = c(0, 0)) +
coord_cartesian(xlim = c(0, 12), expand = FALSE, clip = "on")+
labs(subtitle="mean_qi") +
geom_abline(slope=summary(fit2)$fixed[2,1], intercept=summary(fit2)$fixed[1,1], color="salmon2")+
facet_grid(fit~.)
p1/p2
fit1 formula: brm(y|mi(sdy=SD) ~ me(x,xSD) + (1|a/b)
fit2 formula: brm(y|mi(sdy=SD) ~ 0+Intercept+me(x,xSD) + (1|a/b)
both model fits show marked visual difference between model intercept (red line) and add_predicted_draw median/mean (dark line)
- Operating System:
R version 4.3.2 (2023-10-31)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 22.04.4 LTS
packageVersion(“brms”)
[1] ‘2.21.0’