Hia,
I’ve been playing with @rtrangucci’s model from: Dynamic panel data models with Stan? but have not been able to apply it to a dataset with irregular intervals.
My initial attempt at specifying latent parameters between observations fails spectacularly as the number of missing observations increases. Does anyone have a better suggestion?
Much appreciated,
Andrew
Simulated dataset:
library(tidyverse)
library(rstan)
N_fields <- 3
sigma_p <- 3
p <- rnorm(N_fields, 0, sigma_p)
N_plots <- 235
pop <- sample(N_fields, N_plots, replace = T)
sigma_u = 0.1
u <- rnorm(N_plots, p[pop], sigma_u)
N_obs = 35
N = N_plots * N_obs
N_miss = 20 # change as needed
miss = sample(1:N_obs, N_miss)
N_trt = 7
trt = sample(N_trt, N_plots, replace = T)
beta = rnorm(N_trt, 0, 1)
beta[1] = 0
beta[3] = -4
delta = 0.9
sigma_e = .5
y = matrix(ncol = N_plots, nrow = N_obs)
y[1, ] = beta[trt] + u + rnorm(N_plots) * sigma_e / sqrt(1 - delta^2)
for(i in 2:N_obs) {
mu = beta[trt] + u
y[i, ] = rnorm(N_plots, delta * y[i-1, ] + mu, sigma_e)
}
x <- as.data.frame(y) %>%
rowid_to_column("t") %>%
gather(p, y, -t) %>%
mutate(p = as.numeric(as.factor(p)),
trt = rep(trt, each = N_obs),
pop = rep(pop, each = N_obs),
mis = if_else(t %in% miss, 1, 0),
y = if_else(mis == 1, 0, y)) %>%
arrange(t, p)
ggplot(x, aes(x = t, y = y, group = p)) +
geom_line() +
facet_grid(pop ~ trt)
data_list <- list(
N = N,
N_mis = sum(x$mis),
N_trt = N_trt,
N_grp = 1,
N_plt = N_plots,
N_pop = N_fields,
y_obs = x$y,
missing = x$mis,
trt = x$trt,
plt = x$p,
pop = x$pop
)
str(data_list)
Stan model:
data {
int<lower=1> N; // total
int<lower=0> N_mis; // missing
int<lower=1> N_grp; // types
int<lower=1> N_trt; // contrasts
int<lower=1> N_plt; // experiments
int<lower=1> N_pop; // populations
vector[N] y_obs; // abundance
vector[N] missing; // indicator
int<lower=1, upper=N_trt> trt[N];
int<lower=1, upper=N_plt> plt[N];
int<lower=1, upper=N_pop> pop[N];
}
transformed data{
// lagged response + indices
int N_ini = N_grp * N_plt;
int N_ip1 = N_ini + 1;
int Nm_ini = N - N_ini;
vector[Nm_ini] y_lag = y_obs[1:Nm_ini];
}
parameters{
// missing data
vector[N_mis] y_mis;
// predictors
vector[N_trt - 1] beta;
real mu_b;
real<lower=0> sigma_b;
// correlation
real<lower=0, upper=1> delta_raw;
// random effects
vector[N_pop] p;
vector[N_plt] u_raw;
real<lower=0> sigma_u;
// obs variance
real<lower=0> sigma_e;
}
transformed parameters{
real delta;
vector[N_plt] u;
vector[N] mu;
// non-centered parameters
delta = delta_raw * 2 - 1;
u = u_raw * sigma_u;
{
vector[N_trt] beta_ref = append_row(0.0, beta);
mu = p[pop] + beta_ref[trt] + u[plt];
}
}
model{
real om_dsq = 1 - delta^2;
vector[N] y;
//add missing data
int m = 1;
for(i in 1:N) {
if(missing[i] == 1){
y[i] = y_mis[m];
m = m + 1;
} else {
y[i] = y_obs[i];
}
}
// initial observations
y[1:N_ini] ~ normal(mu[1:N_ini], sigma_e / sqrt(om_dsq));
// remainder include lag term
y[N_ip1:N] ~ normal(delta * y_lag + mu[N_ip1:N], sigma_e);
// priors
beta ~ normal(mu_b, sigma_b);
mu_b ~ normal(0, 1);
sigma_b ~ normal(0, 1);
delta_raw ~ beta(4, 4);
p ~ normal(0, 5);
u_raw ~ normal(0, 1);
sigma_u ~ normal(0, 1);
sigma_e ~ normal(0, 1);
}