Help with simple AR(1) model from the manual

I’m trying to write the model from section 10.1 of the manual (page 162) and having some troubles recovering its parameters.

This is my stan code:

data {
  int<lower = 0, upper = 1> run_estimation;               // a switch to evaluate the likelihood
  int<lower=0> N ;
  vector[N] y ;
}

parameters {
  real alpha ;
  real beta ;
  real<lower=0> sigma ;
}

model {
  alpha ~ normal(0,1) ;
  beta ~ normal(0,1) ;
  sigma ~ normal(0,1) ;
  if(run_estimation==1) y[2:N] ~ normal(alpha + beta * y[1:(N - 1)], sigma) ;
}

generated quantities{
  vector[N-1] y_sim ;
  int j=0 ;
  for (n in 2:N){
    j = j + 1 ;
    y_sim[j] = normal_rng(alpha + beta * y[n-1], sigma);
  }
}

First, I generate some fake data to see if I can recover the parameters:

compiled_model_arone <- stan_model("arone.stan")
set.seed(9782)
y <- rnorm(1000)

sim_out <- sampling(compiled_model_arone, data = list(run_estimation = 0,
                                                      N = length(y),
                                                      y = y
                                                      ),
                    seed = 42118)

fake_data_matrix  <- sim_out %>% 
  as.data.frame %>% 
  select(contains("y_sim"))

Then, I try to recover the parameters:

draw <- 82
y_sim <-as.numeric(fake_data_matrix[draw,])

recapture_fit <- sampling(compiled_model_arone, data = list(run_estimation = 1,
                                                      N = length(y_sim),
                                                      y = y_sim
                                                      ),
                    seed = 42118)

alpha <- as.data.frame(recapture_fit, pars = "alpha") 
true_alpha<- as.matrix(sim_out, pars = "alpha")[draw]

ggplot(data = alpha, aes(x = alpha))  + 
  geom_density()  + geom_vline(aes(xintercept = true_alpha),color="red") 

## beta
beta <- as.data.frame(recapture_fit, pars = "beta") 
true_beta <- as.matrix(sim_out, pars = "beta")[draw]

ggplot(data = beta, aes(x = beta))  + 
  geom_density()  + geom_vline(aes(xintercept = true_beta),color="red") 

## sigma
sigma <- as.data.frame(recapture_fit, pars = "sigma") 
true_sigma <- as.matrix(sim_out, pars = "sigma")[draw]

ggplot(data = sigma, aes(x = sigma))  + 
  geom_density()  + geom_vline(aes(xintercept = true_sigma),color="red") 

For example. this is what I get for beta:

So, if I understand @James_Savage’s tutorial correctly, something is wrong with my model.

I also tried plotting y and the posterior mean of y:

y = y_sim[2:length(y_sim)]
y_sim = as.data.frame(recapture_fit, pars = "y_sim") %>% summarise_all(mean) %>% as.numeric()

df.plot <- data.frame(y =y, y_sim = y_sim)  

  
ggplot(data = df.plot, aes(x=y, y=y_sim)) + geom_point() +
  geom_abline(intercept = 0, slope = 1, linetype="dotdash") + 
  xlim(min(df.plot$y, df.plot$y_sim), max(df.plot$y, df.plot$y_sim)) + 
  ylim(min(df.plot$y, df.plot$y_sim), max(df.plot$y, df.plot$y_sim))

What am I missing?

To answer my own question, there is something wrong on how i’m trying to replicate @James_Savage’s method. If instead of following his method to generate fake data, I wite an _rng function, things work well.

In case someone is curious, this is the _rng function that I wrote:

functions {
  matrix  ar_one_rng(int S, real gamma_mean, real gamma_sigma, real sigma, real gamma){
    int N = 100 ;
    matrix[S, N] out ; // olds prior predictive draws
    for (s  in 1:S){
      out[s, 1] = 1.75 ;
      for(n in 2:N){
        out[s, n] = normal_rng(gamma*out[s, n-1], sigma);
      }
    }
    return out;
  }
}

and I’m able to recover all the parameters

library(tidyverse)
library(rstan)

expose_stan_functions('ar_one_rng.stan')
y_sim <- ar_one_rng(S = 4000, gamma_mean = 1, gamma_sigma = 0.25, sigma = 10, gamma = 1.05, seed = 9782)
df.plot <- data.frame(y = y_sim[82,]) %>% mutate(y_m1 = lag(y))

compiled_model_arone <- stan_model("arone.stan")

recapture_fit <- sampling(compiled_model_arone, data = list(run_estimation = 1,
                                                      N = length(df.plot$y),
                                                      y = df.plot$y
                                                      ),
                    seed = 42118)

## beta
beta <- as.data.frame(recapture_fit, pars = "beta") 
true_beta <- 1.05
y_sim = as.data.frame(recapture_fit, pars = "y_sim") %>% summarise_all(mean) %>% as.numeric()

df.plot2 <- data.frame(y = df.plot$y[2:100], y_sim = y_sim)  

  
ggplot(data = df.plot2, aes(x=y, y=y_sim)) + geom_point() +
  geom_abline(intercept = 0, slope = 1, linetype="dotdash") + 
  xlim(min(df.plot$y, df.plot$y_sim), max(df.plot$y, df.plot$y_sim)) + 
  ylim(min(df.plot$y, df.plot$y_sim), max(df.plot$y, df.plot$y_sim))

image

I will try to figure out what I’m missing in the other code over the weekend…

@ignacio In your first attempt, the simulated data uses y (the white noise you’ve provided) as the lagged value, so your simulated data has no real information.

1 Like

Once I fix the simulator it works fine:

1 Like

@James_Savage could you share the fixed code?

Thanks!

In case this is useful for someone, here is the working version (thanks @James_Savage !)

data {
  int<lower = 0, upper = 1> run_estimation;               // a switch to evaluate the likelihood
  int<lower=0> N ;
  vector[N] y ;
}

parameters {
  real alpha ;
  real<lower = -1, upper = 1> beta ;
  real<lower=0> sigma ;
}

model {
  alpha ~ normal(0,1) ;
  beta ~ normal(0,1) ;
  sigma ~ normal(0,1) ;
  if(run_estimation==1) y[2:N] ~ normal(alpha + beta * y[1:(N - 1)], sigma) ;
}

generated quantities{
  vector[N] y_sim ;
  y_sim[1] = y[1] ;
  for (n in 2:N){
    y_sim[n] = normal_rng(alpha + beta * y_sim[n-1], sigma) ;
  }
}

3 Likes