I’m trying to extend the model discussed in this thread to include a discontinuity at time T^*.
This is the model that I want to fit:
where,
In this model i is an index for individuals, j for the group the individual belongs, and t for time [1,50]. Alas, when I try to recover \gamma I end-up with something like this:
This is how I generated the fake data with \gamma=0.5
library(dplyr)
set.seed(123)
delta <- 0.5
gamma <- 0.5
sigma_a <- 0.5
sigma_b <- 0.25
sigma_e <- 1
I <- 100
T <- 50
J <- 10
discontinuity_T <- 25
fake_data <- tibble(individual = 1:I,
group = sample(1:J, I, replace=TRUE))
a <- tibble(individual = 1:I,
a = rnorm(I, 0, sigma_a))
b <- tibble(group = 1:J,
b = rnorm(J, 0, sigma_b))
initial_values <- fake_data %>%
inner_join(a, by = "individual") %>%
inner_join(b, by = "group") %>%
mutate(u = a + b,
initial_y = u +
sigma_e / sqrt(1 - delta^2) * rnorm(I)) # Is this right???
simulated_panel <- initial_values %>%
group_by(individual, group) %>% # Is this right???
do({
simulate <- function(x) {
out <- data.frame(y = rep(NA, T),
time = rep(NA, T),
u = x$u)
for(t in 1:T) {
out$time[t] <- t
if(t == 1) {
out$y[t] <- x$initial_y
} else {
out$y[t] <- rnorm(1, delta * (out$y[t-1] - x$u) + x$u, sigma_e)
}
}
return(out)
}
as.data.frame(simulate(.))
}) %>%
mutate(y = case_when(time < discontinuity_T ~ y,
TRUE ~ y + gamma)) %>%
group_by(individual, group) %>% # Is this right???
mutate(lagged_y = lag(y, default = -99)) %>%
ungroup()
And this is the Stan model that I wrote:
data {
int<lower=1> N; // Number of observations
int<lower=1> I; // Number of individuals
int<lower=1,upper=I> idx_a[N]; // individual index
int<lower=1> G; // Number of groups
int<lower=1,upper=G> idx_b[N]; // group index
int<lower=1> T; // Number of time periors
int<lower=1,upper=T> idx_time[N]; // time index
vector[N] y; // Outcome
vector[N] y_lag; // Lagged outcome
int<lower=2,upper=T-1> T_z; // Period in which the treatment is introduced
}
parameters {
real<lower=0,upper=1> delta_raw;
real<lower=0> sigma_a;
real<lower=0> sigma_b;
real<lower=0> sigma_e;
vector[I] a_raw;
vector[G] b_raw;
real gamma;
}
transformed parameters {
vector[I] a = sigma_a * a_raw; // individual lvl random effects
vector[G] b = sigma_b * b_raw; // group lvl random effects
real delta = delta_raw * 2 - 1; // delta will be between -1 and 1
}
model {
real one_minus_delta_sq = (1 - delta) * (1 + delta);
for(n in 1:N){
if(idx_time[n]==1){
target += normal_lpdf(y[n] | a[idx_a[n]] + b[idx_b[n]],
sigma_e / sqrt(one_minus_delta_sq));
}
else if (idx_time[n] < T_z){
target += normal_lpdf(y[n] | a[idx_a[n]] +
b[idx_b[n]] +
delta * (y_lag[n] - a[idx_a[n]] - b[idx_b[n]]),
sigma_e);
}
else{
target += normal_lpdf(y[n] | a[idx_a[n]] +
b[idx_b[n]] +
delta * (y_lag[n] - a[idx_a[n]] - b[idx_b[n]]) +
gamma,
sigma_e);
}
}
gamma ~ std_normal();
a_raw ~ std_normal();
b_raw ~ std_normal();
sigma_a ~ std_normal();
sigma_b ~ std_normal();
sigma_e ~ std_normal();
delta_raw ~ beta(4, 4);
}
generated quantities{
vector[N] y_rep;
vector[N] y_contrafactual;
real one_minus_delta_sq = (1 - delta) * (1 + delta);
for(n in 1:N){
if(idx_time[n]==1){
y_rep[n] = normal_rng(a[idx_a[n]] +
b[idx_b[n]],
sigma_e / sqrt(one_minus_delta_sq));
y_contrafactual[n] = normal_rng(a[idx_a[n]] +
b[idx_b[n]]+
gamma,
sigma_e / sqrt(one_minus_delta_sq));
}
else if (idx_time[n] < T_z){
y_rep[n] = normal_rng(a[idx_a[n]] +
b[idx_b[n]] +
delta * (y_lag[n] - a[idx_a[n]] - b[idx_b[n]]),
sigma_e);
y_contrafactual[n] = normal_rng(a[idx_a[n]] +
b[idx_b[n]] +
delta * (y_lag[n] - a[idx_a[n]] - b[idx_b[n]]) +
gamma, sigma_e);
}
else{
y_rep[n] = normal_rng(a[idx_a[n]] +
b[idx_b[n]] +
delta * (y_lag[n] - a[idx_a[n]] - b[idx_b[n]]) +
gamma,
sigma_e);
y_contrafactual[n] = normal_rng(a[idx_a[n]] +
b[idx_b[n]] +
delta * (y_lag[n] - a[idx_a[n]] - b[idx_b[n]]),
sigma_e);
}
}
}
Finally, here is the code to fit the model and produce the plot for \gamma
stan_dat <- list(N = nrow(simulated_panel),
I = length(unique(simulated_panel$individual)),
idx_a = simulated_panel$individual,
G = length(unique(simulated_panel$group)),
idx_b = simulated_panel$group,
T = length(unique(simulated_panel$time)),
idx_time = simulated_panel$time,
y = simulated_panel$y,
y_lag = simulated_panel$lagged_y,
T_z = discontinuity_T)
fit3 <- sampling(model3,
data = stan_dat,
cores = 6,
chains = 6,
iter = 3000,
control = list(adapt_delta = 0.99))
print(fit3 , pars = c('delta','sigma_a', 'sigma_b','sigma_e', 'gamma'))
mcmc_dens(as.data.frame(fit3), pars = "gamma") +
geom_vline(aes(xintercept = discontinuity), color="red", linetype = "dashed") +
annotate("text", y = 12, x = discontinuity - 0.015, label = "true coef") +
xlim(0.2, 0.55)
Any ideas for why I’m not able to recover \gamma?