I’m reading section 28.11 of the manual and I have the following question. Suppose my data looks something like this:
library(dplyr)
set.seed(9782)
N <- 500
df <- data.frame(x = rnorm(n = N, mean = 10, sd = 2),
e = rnorm(n=N, mean = 0, sd=2)) %>%
mutate(y = 10 + 0.5*x + e) %>% select(-e)
And this is my stan code:
data {
int<lower=1> N ;
vector[N] x ;
vector[N] y ;
}
transformed data {
real mean_y = mean(y);
real<lower=0> sd_y = sd(y);
vector[N] y_std = (y - mean_y)/sd_y;
real mean_x = mean(x);
real<lower=0> sd_x = sd(x);
vector[N] x_std = (x - mean_x)/sd_x;
}
parameters {
real alpha_std;
real beta_std;
real<lower=0> sigma_std;
}
model {
alpha_std ~ normal(0,10);
beta_std ~ normal(0,10);
sigma_std ~ cauchy(0,5);
y ~ normal(alpha_std + beta_std*x_std, sigma_std);
}
generated quantities {
vector[N] y_sim;
vector[N] y_sim2;
vector[N] log_lik;
vector[N] theta;
real alpha = sd_y * (alpha_std - beta_std*mean_x / sd_x) + mean_y;
real beta = beta_std*sd_y/sd_x;
real<lower=0> sigma = sd_y*sigma_std;
for (n in 1:(N)) {
theta[n] = (alpha_std + beta_std*x_std[n])*sd_y + mean_y;
y_sim[n] = normal_rng(alpha + beta*x[n], sigma);
y_sim2[n] = normal_rng(theta[n], sigma);
log_lik[n] = normal_lcdf(y_sim[n] | alpha + beta*x[n], sigma);
}
}
When I try to draw from the posterior things don’t look as I would expect:
library(rstan)
library(bayesplot)
stan_list <- list(N = N, x = df$x, y = df$y)
fit <- sampling(model, data = stan_list)
y_sim <- as.data.frame(fit, pars="y_sim")
ppc_dens_overlay(y = df$y,
yrep = as.matrix(y_sim[sample(x = 1:nrow(y_sim), size = 50),]))
y_sim2 <- as.data.frame(fit, pars="y_sim")
ppc_dens_overlay(y = df$y,
yrep = as.matrix(y_sim[sample(x = 1:nrow(y_sim2), size = 50),]))
What am I missing?
Thanks a lot for the help!