I have spent some time looking at the Lotka-Volterra example, and I tried to build some of the Exercices and Extensions (btw it is a great example for beginners).
It was straightforward to extend the predictions into the future by simply changing the time parameter of the ODE. However, the result is not really satisfying because the confidence interval does not become wider over time. I feel that the further away we get from the data we should have less confidence in our forecats.
Is there any approach to incorporate this uncertainty into the model?
Here is my stan code, there is not much changed relative to the original model:
// Lotka Volterra Model
functions {
real[] dz_dt(real t, // time
real[] z, // system state {prey, predator}
real[] theta, // parameters
real[] x_r, // unused data
int[] x_i) {
real u = z[1];
real v = z[2];
real alpha = theta[1];
real beta = theta[2];
real gamma = theta[3];
real delta = theta[4];
real du_dt = (alpha - beta * v) * u;
real dv_dt = (-gamma + delta * u) * v;
return { du_dt, dv_dt };
}
}
data {
int<lower = 0> N; // number of measurement times
int<lower = 0> Nf; // number of periods to forecast
real ts[N+Nf]; // measurement and forecast times > 0
real y_init[2]; // initial measured populations
real<lower = 0> y[N, 2]; // measured populations
}
parameters {
real<lower = 0> theta[4]; // { alpha, beta, gamma, delta }
real<lower = 0> z_init[2]; // initial population
real<lower = 0> sigma[2]; // measurement errors
}
transformed parameters {
real z[N+Nf, 2] = integrate_ode_rk45(dz_dt, z_init, 0, ts, theta,
rep_array(0.0, 0), rep_array(0, 0),
1e-5, 1e-3, 5e2);
}
model {
theta[{1, 3}] ~ normal(1, 0.5);
theta[{2, 4}] ~ normal(0.05, 0.05);
sigma ~ lognormal(-1, 1);
z_init ~ lognormal(log(10), 1);
for (k in 1:2) {
y_init[k] ~ lognormal(log(z_init[k]), sigma[k]);
y[ , k] ~ lognormal(log(z[1:N, k]), sigma[k]);
}
}
generated quantities {
real y_init_rep[2];
real y_rep[N+Nf, 2];
for (k in 1:2) {
y_init_rep[k] = lognormal_rng(log(z_init[k]), sigma[k]);
for (n in 1:(N+Nf))
y_rep[n, k] = lognormal_rng(log(z[n, k]), sigma[k]);
}
}
And here is my R code:
N <- length(lynx_hare_df$Year) - 1
y_init <- c(lynx_hare_df$Hare[1], lynx_hare_df$Lynx[1])
y <- as.matrix(lynx_hare_df[2:(N + 1), 2:3])
y <- cbind(y[ , 2], y[ , 1]); # hare, lynx order
Nf <- 50 # years to forecast
ts2 <- 1:(N+Nf) # times
lynx_hare_data_fcast <- list(N = N, Nf = Nf, ts = ts2, y_init = y_init, y = y)
model_fcast <- stan_model("lotka-volterra-forecast.stan")
fit_fcast <- sampling(model_fcast, data = lynx_hare_data_fcast, seed = 123)
print(fit_fcast, pars=c("theta", "sigma", "z_init"),
probs=c(0.1, 0.5, 0.9), digits = 3)
z_init_draws <- extract(fit_fcast)$z_init
z_draws <- extract(fit_fcast)$z
y_init_rep_draws <- extract(fit_fcast)$y_init_rep
y_rep_draws <- extract(fit_fcast)$y_rep
predicted_pelts <- matrix(NA, 1+N+Nf, 2)
min_pelts <- matrix(NA, 1+N+Nf, 2)
max_pelts <- matrix(NA, 1+N+Nf, 2)
for (k in 1:2) {
predicted_pelts[1, k] <- mean(y_init_rep_draws[ , k])
min_pelts[1, k] <- quantile(y_init_rep_draws[ , k], 0.25)
max_pelts[1, k] <- quantile(y_init_rep_draws[ , k], 0.75)
for (n in 2:(1+N+Nf)) {
predicted_pelts[n, k] <- mean(y_rep_draws[ , n - 1, k])
min_pelts[n, k] <- quantile(y_rep_draws[ , n - 1, k], 0.25)
max_pelts[n, k] <- quantile(y_rep_draws[ , n - 1, k], 0.75)
}
}
lynx_hare_melted_df <- melt(as.matrix(lynx_hare_df[, 2:3]))
colnames(lynx_hare_melted_df) <- c("year", "species", "pelts")
lynx_hare_melted_df$year <-
lynx_hare_melted_df$year +
rep(1899, length(lynx_hare_melted_df$year))
Nmelt <- dim(lynx_hare_melted_df)[1]
lynx_hare_observe_df <- lynx_hare_melted_df
lynx_hare_observe_df$source <- rep("measurement", Nmelt)
lynx_hare_predict_df <-
data.frame(year = rep(1900:(1900+N+Nf), 2),
species = c(rep("Lynx", 1+N+Nf), rep("Hare", 1+N+Nf)),
pelts = c(predicted_pelts[, 2],
predicted_pelts[, 1]),
min_pelts = c(min_pelts[, 2], min_pelts[, 1]),
max_pelts = c(max_pelts[, 2], max_pelts[, 1]),
source = rep("prediction", 2*(1+N+Nf)))
lynx_hare_observe_df$min_pelts = NA #lynx_hare_predict_df$min_pelts
lynx_hare_observe_df$max_pelts = NA #lynx_hare_predict_df$max_pelts
lynx_hare_observe_predict_df <-
rbind(lynx_hare_observe_df, lynx_hare_predict_df)
population_plot2 <-
ggplot(data = lynx_hare_observe_predict_df,
aes(x = year, y = pelts, color = source)) +
geom_vline(xintercept = 1900, color = "grey") +
geom_hline(yintercept = 0, color = "grey") +
facet_wrap( ~ species, ncol = 1) +
geom_ribbon(data = lynx_hare_observe_predict_df %>% filter( source=='prediction' ),
aes(ymin = min_pelts, ymax = max_pelts),
colour = NA, fill = "black", alpha = 0.1) +
geom_line() +
geom_point() +
ylab("pelts (thousands)") +
ggtheme_tufte() +
theme(legend.position="bottom") + labs(x=NULL)
population_plot2