Dynamic panel data models with Stan?

Here’s my solution, with some code.

set.seed(123)
delta <- 0.5
sigma_u <- 0.5
sigma_e <- 1
I <- 100
T <- 30
u = rnorm(I, 0, sigma_u)
initial_values <- data_frame(
  individual = 1:I,
  u = u,
  initial_y = u + sigma_e / 
                  sqrt(1 - delta^2) * rnorm(I) # p(y_1 | u, sigma_e, delta)
)


simulated_panel <- initial_values %>%
  group_by(individual) %>% 
  do({
    simulate <- function(x) {
      out <- data.frame(y = rep(NA, T),
                        time = rep(NA, T),
                        u = x$u)
      for(t in 1:T) {
        out$time[t] <- t
        if(t == 1) {
          out$y[t] <- x$initial_y
        } else {
          out$y[t] <- rnorm(1, delta * (out$y[t-1] - x$u) + x$u, sigma_e)
        }
      }
      return(out)
    }
    as.data.frame(simulate(.))
  }) %>% 
  group_by(individual) %>% 
  mutate(lagged_y = lag(y)) %>% ungroup()

y <- matrix(0, T, I)
y_lag <- matrix(0, T, I)
for (i in 1:I) {
  y[,i] <- filter(simulated_panel, individual == i)$y
  y_lag[2:T,i] <- filter(simulated_panel, individual == i)$lagged_y[2:T]
}

stan_dat <- list(I = I, T = T, y = y, y_lag = y_lag)
library(rstan)
mod <- stan_model('dynamic-panel.stan')
fit <- sampling(mod, data = stan_dat, cores = 4, chains = 4, iter = 2000)

Here’s dynamic-panel.stan:

data {
  int<lower=1> I;
  int<lower=1> T;
  matrix[T,I] y;
  matrix[T,I] y_lag;
}
parameters {
  real<lower=0,upper=1> delta_raw;
  real<lower=0> sigma_u;
  real<lower=0> sigma_e;
  vector[I] u_raw;
}
transformed parameters {
  vector[I] u = sigma_u * u_raw;
  real delta = delta_raw * 2 - 1;
}
model {
  real one_minus_delta_sq = (1 - delta) * (1 + delta);
  vector[I] u_delta = u * (1 - delta);
  for (i in 1:I) {
    target += normal_lpdf(y[1,i] | u[i], 
                          sigma_e / sqrt(one_minus_delta_sq));
    target += normal_lpdf(y[2:T,i] | u_delta[i]
                                     + delta * y_lag[2:T,i], sigma_e);
  }
  u_raw ~ normal(0, 1);
  sigma_u ~ normal(0, 2);
  sigma_e ~ normal(0, 1);
  delta_raw ~ beta(4, 4);
}

Summary of the model run over the simulated data from above:

> print(fit , pars = c('delta','sigma_u','sigma_e'))
Inference for Stan model: dynamic-panel.
4 chains, each with iter=2000; warmup=1000; thin=1; 
post-warmup draws per chain=1000, total post-warmup draws=4000.

        mean se_mean   sd 2.5%  25%  50%  75% 97.5% n_eff Rhat
delta   0.50    0.00 0.04 0.42 0.47 0.50 0.52  0.57   831 1.01
sigma_u 0.36    0.01 0.13 0.05 0.29 0.37 0.45  0.58   430 1.01
sigma_e 1.01    0.00 0.02 0.96 0.99 1.01 1.03  1.06  4000 1.00

Samples were drawn using NUTS(diag_e) at Fri Sep  7 08:03:36 2018.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).

When I boost the number of observations per individual to 50 the inference gets sharper for sigma_u and approaches the true value.

Inference for Stan model: dynamic-panel.
4 chains, each with iter=2000; warmup=1000; thin=1; 
post-warmup draws per chain=1000, total post-warmup draws=4000.

        mean se_mean   sd 2.5%  25%  50%  75% 97.5% n_eff Rhat
delta   0.48       0 0.01 0.46 0.47 0.48 0.49  0.51  4000    1
sigma_u 0.46       0 0.04 0.38 0.43 0.46 0.49  0.55  1400    1
sigma_e 0.99       0 0.01 0.98 0.99 0.99 1.00  1.01  4000    1

Samples were drawn using NUTS(diag_e) at Fri Sep  7 08:17:52 2018.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
1 Like