This is a follow up to my previous question about AR(1) models.
I can recover the parameters of the following simple model:
data {
int<lower = 0, upper = 1> run_estimation; // a switch to evaluate the likelihood
int<lower=0> N ;
vector[N] y ;
}
parameters {
real alpha ;
real<lower = -1, upper = 1> beta ;
real<lower=0> sigma ;
}
model {
alpha ~ normal(0,1) ;
beta ~ normal(0,1) ;
sigma ~ normal(0,1) ;
if(run_estimation==1) y[2:N] ~ normal(alpha + beta * y[1:(N - 1)], sigma) ;
}
generated quantities{
vector[N] y_sim ;
y_sim[1] = y[1] ;
for (n in 2:N){
y_sim[n] = normal_rng(alpha + beta * y_sim[n-1], sigma) ;
}
}
Alas, when I try to plot y
vs y_sim
I get this:
This is the R code that I use to produce the plot:
library(tidyverse)
library(rstan)
rstan_options(auto_write = TRUE)
options(mc.cores = 4)
set.seed(9782)
# generate fake data
set.seed(9782)
y <- rnorm(1000)
sim_out <- sampling(model, data = list(run_estimation = 0,
N = length(y),
y = y
),
seed = 42118)
fake_data_matrix <- sim_out %>%
as.data.frame %>%
select(contains("y_sim"))
# Recapture
draw <- 82
y_sim <-as.numeric(fake_data_matrix[draw,])
recapture_fit <- sampling(model, data = list(run_estimation = 1,
N = length(y_sim),
y = y_sim
),
seed = 42118)
# Plot
y = as.numeric(fake_data_matrix[draw,])[2:length(y_sim)]
y_sim = as.data.frame(recapture_fit, pars = "y_sim") %>% summarise_all(mean) %>% as.numeric()
df.plot <- data.frame(y =y, y_sim = y_sim[2:length(y_sim)])
ggplot(data = df.plot, aes(x=y, y=y_sim)) + geom_point() +
geom_abline(intercept = 0, slope = 1, linetype="dotdash") +
xlim(min(df.plot$y, df.plot$y_sim), max(df.plot$y, df.plot$y_sim)) +
ylim(min(df.plot$y, df.plot$y_sim), max(df.plot$y, df.plot$y_sim))
What am I doing wrong? Thanks!
PS: In case it is easier, I also saved this code as a gist