Intercept in model summary different to geom_line from posterior draws

Intercept from brms model summary is different to posterior draws plot. What am I missing?

library(tidyverse)
library(brms)
library(tidybayes)
library(cmdstanr)
library(modelr)
setseed(1234)

# adjusted from 
# https://willemsleegers.com/content/posts/18-bayesian-tutorial-simple-regression/bayesian-tutorial-simple-regression.html

options(
  mc.cores = 4,
  brms.threads = 4,
  brms.backend = "cmdstanr",
  brms.file_refit = "on_change"
)

data <- read_csv("https://willemsleegers.com/content/posts/18-bayesian-tutorial-simple-regression/Howell1.csv")
data <- filter(data, age >= 18)

data <- mutate(data, weight_mc = weight - mean(weight))

m1 <- brm(
  data = data,
  height ~ weight_mc,
  family = gaussian,
  prior = c(
    prior(normal(160, 10), class = "Intercept"),
    prior(cauchy(5, 5), class = "sigma"),
    prior(normal(0, 3), class = "b")
  ),
  sample_prior = TRUE,
  seed = 4,
  silent = 2,
  file = "m1.rds"
)

m1

lm(data = data,
   height ~ weight_mc)

weight_mean <- data %>%
  pull(weight) %>%
  mean()

predicted_slopes_qi <- data_grid(data, weight_mc = round(weight_mc)) |>
  add_predicted_draws(m1) |>
  median_qi() |>
  mutate(weight = weight_mc + weight_mean)

pdraw<-data_grid(data, weight_mc = round(weight_mc)) |>
  add_predicted_draws(m1) |>
  mutate(weight = weight_mc + weight_mean)

lm(data = pdraw%>%sample_n(100)%>%mutate(height=.prediction),
   height ~ weight_mc)

linpred=data_grid(data, weight_mc = round(weight_mc)) |>
  add_linpred_draws(m1) |>
  median_qi()|>
mutate(weight = weight_mc + weight_mean)

p1=ggplot() +
  geom_abline(intercept=154.61, slope=0.9, color="red")+
  geom_ribbon(
    aes(ymin = .lower, ymax = .upper, x = weight),
    data = predicted_slopes_qi,
    alpha = .25
  ) +
  geom_line(
    aes(x = weight, y = .prediction),
    data = predicted_slopes_qi
  ) +
  geom_point(
    aes(x = weight, y = .prediction),color="skyblue2",pch="*", size=3,
    data = pdraw%>%sample_n(100)
  ) +
  
  geom_point(
    aes(x = weight, y = height),
    data = data,
    alpha = .25
  ) +
  geom_ribbon(
    aes(ymin = .lower, ymax = .upper, x = weight),color="salmon",
    data = linpred,
    alpha = .5
  )+ 
 labs(x = "Weight", y = "Height")+
  coord_cartesian(xlim = c(0, 60), ylim=c(140,180))
p1

Thanks
Herry

  • brms Version: 2.21.0’
    OS:
    platform x86_64-pc-linux-gnu
    arch x86_64
    os linux-gnu
    system x86_64, linux-gnu
    status
    major 4
    minor 3.2
    year 2023
    month 10
    day 31
    svn rev 85441
    language R
    version.string R version 4.3.2 (2023-10-31)
    nickname Eye Holes

I’ve fit the model and played with it a bit, and I believe the issue is that you fit the model and created the predictions based on the centered variable version weight_mc while in your plot you are sometimes calling for it to plot the original variable weight.

I’ve modified your plotting code to this:

ggplot() +
  geom_abline(intercept=154.61, slope=0.9, color="red")+
  geom_ribbon(
    aes(ymin = .lower, ymax = .upper, x = weight_mc),
    data = predicted_slopes_qi,
    alpha = .25
  ) +
  geom_line(
    aes(x = weight_mc, y = .prediction),
    data = predicted_slopes_qi
  ) +
  geom_point(
    aes(x = weight_mc, y = .prediction),color="skyblue2",pch="*", size=3,
    data = pdraw%>%sample_n(100)
  ) +
  
  geom_point(
    aes(x = weight_mc, y = height),
    data = data,
    alpha = .25
  ) +
  geom_ribbon(
    aes(ymin = .lower, ymax = .upper, x = weight_mc),color="salmon",
    data = linpred,
    alpha = .5
  )+ 
  labs(x = "Weight (centered)", y = "Height")

Which outputs the following plot where the oringal geom_abline() matches up with the model predictions.

2 Likes