Cannot recover discontinuity parameter in hierarchical panel model

I’m trying to extend the model discussed in this thread to include a discontinuity at time T^*.

This is the model that I want to fit:

Y_{i,j,t} = a_i + b_{j[i]}+ \delta(Y_{i,t-1} - a_i - b_{j[i]}) + \gamma z_t + \epsilon_{i,j,t} \\

where,

z_t = \left\{ \begin{array}{l} 0, & if & t<25 \\ 1, & \textrm{otherwise } \end{array} \right.

In this model i is an index for individuals, j for the group the individual belongs, and t for time [1,50]. Alas, when I try to recover \gamma I end-up with something like this:

This is how I generated the fake data with \gamma=0.5

library(dplyr)
set.seed(123)
delta <- 0.5
gamma <- 0.5

sigma_a <- 0.5
sigma_b <- 0.25
sigma_e <- 1


I <- 100
T <- 50
J <- 10
discontinuity_T <- 25

fake_data <- tibble(individual = 1:I,
                    group = sample(1:J, I, replace=TRUE))

a <- tibble(individual = 1:I,
            a = rnorm(I, 0, sigma_a))
b <- tibble(group = 1:J,
            b = rnorm(J, 0, sigma_b))

initial_values <- fake_data %>% 
  inner_join(a, by = "individual") %>% 
  inner_join(b, by = "group") %>% 
  mutate(u = a + b,
         initial_y = u + 
                     sigma_e / sqrt(1 - delta^2) * rnorm(I))  # Is this right???

simulated_panel <- initial_values %>%
  group_by(individual, group) %>%  # Is this right???
  do({
    simulate <- function(x) {
      out <- data.frame(y = rep(NA, T),
                        time = rep(NA, T),
                        u = x$u)
      for(t in 1:T) {
        out$time[t] <- t
        if(t == 1) {
          out$y[t] <- x$initial_y
        } else {
          out$y[t] <- rnorm(1, delta * (out$y[t-1] - x$u) + x$u, sigma_e)
        }
      }
      return(out)
    }
    as.data.frame(simulate(.))
  }) %>% 
  mutate(y = case_when(time < discontinuity_T ~ y,
                       TRUE ~ y + gamma)) %>% 
  group_by(individual, group) %>%  # Is this right???
  mutate(lagged_y = lag(y, default = -99)) %>% 
  ungroup()

And this is the Stan model that I wrote:


data {
  int<lower=1> N;                         // Number of observations
  int<lower=1> I;                         // Number of individuals
  int<lower=1,upper=I> idx_a[N];          // individual index
  int<lower=1> G;                         // Number of groups
  int<lower=1,upper=G> idx_b[N];          // group index
  int<lower=1> T;                         // Number of time periors
  int<lower=1,upper=T> idx_time[N];       // time index
  vector[N] y;                            // Outcome
  vector[N] y_lag;                        // Lagged outcome
  int<lower=2,upper=T-1> T_z;             // Period in which the treatment is introduced
}

parameters {
  real<lower=0,upper=1> delta_raw;
  real<lower=0> sigma_a;
  real<lower=0> sigma_b;
  real<lower=0> sigma_e;
  vector[I] a_raw;
  vector[G] b_raw;
  real gamma;
}

transformed parameters {
  vector[I] a = sigma_a * a_raw;           // individual lvl random effects
  vector[G] b = sigma_b * b_raw;           // group lvl random effects
  real delta = delta_raw * 2 - 1;          // delta will be between -1 and 1
}

model {
  real one_minus_delta_sq = (1 - delta) * (1 + delta);
  for(n in 1:N){
      if(idx_time[n]==1){
         target += normal_lpdf(y[n] | a[idx_a[n]] + b[idx_b[n]], 
                                      sigma_e / sqrt(one_minus_delta_sq));
      }
      else if (idx_time[n] < T_z){
         target += normal_lpdf(y[n] | a[idx_a[n]] + 
                                      b[idx_b[n]] + 
                                      delta * (y_lag[n] - a[idx_a[n]] - b[idx_b[n]]), 
                                      sigma_e); 
      }
      else{
         target += normal_lpdf(y[n] | a[idx_a[n]] + 
                                      b[idx_b[n]] + 
                                      delta * (y_lag[n] - a[idx_a[n]] - b[idx_b[n]]) + 
                                      gamma, 
                                      sigma_e); 
      }
  }
  gamma ~ std_normal();
  a_raw ~ std_normal();
  b_raw ~ std_normal();
  sigma_a ~ std_normal();
  sigma_b ~ std_normal();
  sigma_e ~ std_normal();
  delta_raw ~ beta(4, 4);
}

generated quantities{
    vector[N] y_rep;
    vector[N] y_contrafactual;
    real one_minus_delta_sq = (1 - delta) * (1 + delta);
    for(n in 1:N){
      if(idx_time[n]==1){
         y_rep[n] = normal_rng(a[idx_a[n]] + 
                               b[idx_b[n]], 
                              sigma_e / sqrt(one_minus_delta_sq));
         y_contrafactual[n] = normal_rng(a[idx_a[n]] + 
                                         b[idx_b[n]]+ 
                                         gamma, 
                                         sigma_e / sqrt(one_minus_delta_sq));
      }
      else if (idx_time[n] < T_z){
         y_rep[n] = normal_rng(a[idx_a[n]] + 
                               b[idx_b[n]] + 
                               delta * (y_lag[n] - a[idx_a[n]] - b[idx_b[n]]), 
                               sigma_e);
         y_contrafactual[n] = normal_rng(a[idx_a[n]] + 
                                         b[idx_b[n]] + 
                                         delta * (y_lag[n] - a[idx_a[n]] - b[idx_b[n]]) + 
                                         gamma, sigma_e);  
      }
      else{
         y_rep[n] = normal_rng(a[idx_a[n]] + 
                               b[idx_b[n]] + 
                               delta * (y_lag[n] - a[idx_a[n]] - b[idx_b[n]]) + 
                               gamma, 
                               sigma_e);
         y_contrafactual[n] = normal_rng(a[idx_a[n]] + 
                                         b[idx_b[n]] + 
                                         delta * (y_lag[n] - a[idx_a[n]] - b[idx_b[n]]), 
                                         sigma_e);  
      }
  }

}

Finally, here is the code to fit the model and produce the plot for \gamma

stan_dat <- list(N = nrow(simulated_panel),
                 I = length(unique(simulated_panel$individual)),
                 idx_a = simulated_panel$individual,
                 G = length(unique(simulated_panel$group)),
                 idx_b = simulated_panel$group,
                 T = length(unique(simulated_panel$time)),
                 idx_time = simulated_panel$time,
                 y = simulated_panel$y,
                 y_lag = simulated_panel$lagged_y,
                 T_z = discontinuity_T)

fit3 <- sampling(model3, 
                 data = stan_dat, 
                 cores = 6, 
                 chains = 6, 
                 iter = 3000, 
                 control = list(adapt_delta = 0.99))

print(fit3 , pars = c('delta','sigma_a', 'sigma_b','sigma_e', 'gamma'))

mcmc_dens(as.data.frame(fit3), pars = "gamma") +
  geom_vline(aes(xintercept = discontinuity), color="red", linetype = "dashed") +
  annotate("text", y = 12, x = discontinuity - 0.015, label = "true coef") +
  xlim(0.2, 0.55)

Any ideas for why I’m not able to recover \gamma?

I’ve not tried to read your model in detail so let me just give the generic advice that, yes, if everything is going well, you should be able to recover your parameters (with appropriate uncertainty) from the data. So, if you’re not able to do so, I recommend stripping down the model until you can recover the parameters, then adding features one by one until you figure out what’s causing the problem.

1 Like

This works:

Generating some fake data:

set.seed(123)

discontinuity_T <- 25
sigma_a <- 0.5
sigma_b <- 0.25
sigma_e <- 1

I <- 100
T <- 50
J <- 10

gammas <- tibble(group = 1:J,
                 gammas = rnorm(J))

groupsXwalk <- tibble(individual = 1:I,
                      group = sample(1:J, I, replace=TRUE)) %>% 
  inner_join(gammas, by = "group")

a <- tibble(individual = 1:I,
            a = rnorm(I, 0, sigma_a))

b <- tibble(group = 1:J,
            b = rnorm(J, 0, sigma_b))

initial_values <- groupsXwalk %>% 
  inner_join(a, by = "individual") %>% 
  inner_join(b, by = "group") %>% 
  mutate(u = a + b,
         initial_y = u + 
                     sigma_e / sqrt(1 - delta^2) * rnorm(I))  # Is this right???

simulated_panel <- initial_values %>%
  group_by(individual) %>%  
  do({
    simulate <- function(x) {
      out <- data.frame(y = rep(NA, T),
                        time = rep(NA, T),
                        a = x$a)
      for(t in 1:T) {
        out$time[t] <- t
        if(t == 1) {
          out$y[t] <- x$initial_y
        } else {
          out$y[t] <- rnorm(1, delta * (out$y[t-1] - x$a) + x$a, sigma_e)
        }
      }
      return(out)
    }
    as.data.frame(simulate(.))
  }) %>% 
  inner_join(groupsXwalk, by = "individual") %>% 
  mutate(y = case_when(time < discontinuity_T ~ y,
                       TRUE ~ y + gammas)) %>% 
  group_by(individual) %>%  
  mutate(lagged_y = lag(y, default = -99, order_by = time)) %>% 
  ungroup()

Stan Model


data {
  int<lower=1> N;                         // Number of observations
  int<lower=1> I;                         // Number of individuals
  int<lower=1,upper=I> idx_a[N];          // individual index
  int<lower=1> G;                         // Number of groups
  int<lower=1,upper=G> idx_b[N];          // group index
  int<lower=1> T;                         // Number of time periors
  int<lower=1,upper=T> idx_time[N];       // time index
  vector[N] y;                            // Outcome
  vector[N] y_lag;                        // Lagged outcome
  int<lower=2,upper=T-1> T_z;             // Period in which the treatment is introduced
}

transformed data{
  vector[T] z;
  for(t  in 1:T){
     if(t<T_z){
       z[t] = 0;
     }
     else{
       z[t] = 1;
     }
  }
}

parameters {
  real<lower=0,upper=1> delta_raw;
  real<lower=0> sigma_a;
  real<lower=0> sigma_b;
  real<lower=0> sigma_gamma;
  real<lower=0> sigma_e;
  vector[I] a_raw;
  vector[G] b_raw;
  vector[G] gamma_raw;
}

transformed parameters {
  vector[I] a = sigma_a * a_raw;                       // individual lvl random effects
  vector[G] b = sigma_b * b_raw;                       // group lvl random effects
  vector[G] gamma = sigma_gamma * gamma_raw;           // group lvl discontinuity
  real delta = delta_raw * 2 - 1;                      // delta will be between -1 and 1
}

model {
  real one_minus_delta_sq = (1 - delta) * (1 + delta);
  for(n in 1:N){
      if(idx_time[n]==1){
         target += normal_lpdf(y[n] | a[idx_a[n]] + b[idx_b[n]], 
                                      sigma_e / sqrt(one_minus_delta_sq));
      }
      else{
         target += normal_lpdf(y[n] | a[idx_a[n]] + b[idx_b[n]] + 
                                      gamma[idx_b[n]] * z[idx_time[n]] + 
                                      delta * (y_lag[n] - a[idx_a[n]] - b[idx_b[n]] - gamma[idx_b[n]] * z[idx_time[n]-1]), 
                                      sigma_e); 
      }
  }
  a_raw ~ std_normal();
  sigma_a ~ std_normal();
  b_raw ~ std_normal();
  sigma_b ~ std_normal();
  gamma_raw ~ std_normal();
  sigma_gamma ~ std_normal();
  sigma_e ~ std_normal();
  delta_raw ~ beta(4, 4);
}

Fitting and recovering the parameters:

stan_dat <- list(N = nrow(simulated_panel),
                 I = length(unique(simulated_panel$individual)),
                 idx_a = simulated_panel$individual,
                 G = length(unique(simulated_panel$group)),
                 idx_b = simulated_panel$group,
                 T = length(unique(simulated_panel$time)),
                 idx_time = simulated_panel$time,
                 y = simulated_panel$y,
                 y_lag = simulated_panel$lagged_y,
                 T_z = discontinuity_T)

fit4 <- sampling(model4, 
                 data = stan_dat, 
                 cores = 4, 
                 chains = 4, 
                 iter = 2000)

2 Likes