Here’s my solution, with some code.

```
set.seed(123)
delta <- 0.5
sigma_u <- 0.5
sigma_e <- 1
I <- 100
T <- 30
u = rnorm(I, 0, sigma_u)
initial_values <- data_frame(
individual = 1:I,
u = u,
initial_y = u + sigma_e /
sqrt(1 - delta^2) * rnorm(I) # p(y_1 | u, sigma_e, delta)
)
simulated_panel <- initial_values %>%
group_by(individual) %>%
do({
simulate <- function(x) {
out <- data.frame(y = rep(NA, T),
time = rep(NA, T),
u = x$u)
for(t in 1:T) {
out$time[t] <- t
if(t == 1) {
out$y[t] <- x$initial_y
} else {
out$y[t] <- rnorm(1, delta * (out$y[t-1] - x$u) + x$u, sigma_e)
}
}
return(out)
}
as.data.frame(simulate(.))
}) %>%
group_by(individual) %>%
mutate(lagged_y = lag(y)) %>% ungroup()
y <- matrix(0, T, I)
y_lag <- matrix(0, T, I)
for (i in 1:I) {
y[,i] <- filter(simulated_panel, individual == i)$y
y_lag[2:T,i] <- filter(simulated_panel, individual == i)$lagged_y[2:T]
}
stan_dat <- list(I = I, T = T, y = y, y_lag = y_lag)
library(rstan)
mod <- stan_model('dynamic-panel.stan')
fit <- sampling(mod, data = stan_dat, cores = 4, chains = 4, iter = 2000)
```

Here’s `dynamic-panel.stan`

:

```
data {
int<lower=1> I;
int<lower=1> T;
matrix[T,I] y;
matrix[T,I] y_lag;
}
parameters {
real<lower=0,upper=1> delta_raw;
real<lower=0> sigma_u;
real<lower=0> sigma_e;
vector[I] u_raw;
}
transformed parameters {
vector[I] u = sigma_u * u_raw;
real delta = delta_raw * 2 - 1;
}
model {
real one_minus_delta_sq = (1 - delta) * (1 + delta);
vector[I] u_delta = u * (1 - delta);
for (i in 1:I) {
target += normal_lpdf(y[1,i] | u[i],
sigma_e / sqrt(one_minus_delta_sq));
target += normal_lpdf(y[2:T,i] | u_delta[i]
+ delta * y_lag[2:T,i], sigma_e);
}
u_raw ~ normal(0, 1);
sigma_u ~ normal(0, 2);
sigma_e ~ normal(0, 1);
delta_raw ~ beta(4, 4);
}
```

Summary of the model run over the simulated data from above:

```
> print(fit , pars = c('delta','sigma_u','sigma_e'))
Inference for Stan model: dynamic-panel.
4 chains, each with iter=2000; warmup=1000; thin=1;
post-warmup draws per chain=1000, total post-warmup draws=4000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
delta 0.50 0.00 0.04 0.42 0.47 0.50 0.52 0.57 831 1.01
sigma_u 0.36 0.01 0.13 0.05 0.29 0.37 0.45 0.58 430 1.01
sigma_e 1.01 0.00 0.02 0.96 0.99 1.01 1.03 1.06 4000 1.00
Samples were drawn using NUTS(diag_e) at Fri Sep 7 08:03:36 2018.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
```

When I boost the number of observations per individual to 50 the inference gets sharper for `sigma_u`

and approaches the true value.

```
Inference for Stan model: dynamic-panel.
4 chains, each with iter=2000; warmup=1000; thin=1;
post-warmup draws per chain=1000, total post-warmup draws=4000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
delta 0.48 0 0.01 0.46 0.47 0.48 0.49 0.51 4000 1
sigma_u 0.46 0 0.04 0.38 0.43 0.46 0.49 0.55 1400 1
sigma_e 0.99 0 0.01 0.98 0.99 0.99 1.00 1.01 4000 1
Samples were drawn using NUTS(diag_e) at Fri Sep 7 08:17:52 2018.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
```