Hi everyone,
I’m currently working on fitting a SEEIIR model to influenza data and making predictions for future time points. I want to ensure that my implementation of the generated quantities
block for prediction aligns with the fit-and-predict methodology recommended by the Stan manual. Below is my code.
functions {
real[] seeiir(real t, real[] y, real[] theta,
real[] x_r, int[] x_i) {
real mu = x_r[1];
real epsilon = x_r[2];
real lambda = x_r[3];
real alpha_max = x_r[4];
int N = x_i[1];
real alpha_min = theta[1];
real t_max = theta[2];
real beta_0 = theta[3];
real e0 = theta[4];
real i0 = theta[5];
real r0 = theta[6];
//real init[6] = {N - (2*i0 + 2*e0 + r0), e0,e0, i0, i0, r0}; // initial values
real sum_init = 0; // Variable to store the sum of elements
// Compute the sum of the elements
//for (i in 1:6) {
// sum_init += init[i];
// }
// print("Sum of elements in init array: ", sum_init);
real S = y[1] ;
real E1 = y[2] ;
real E2 = y[3] ;
real I1 = y[4] ;
real I2 = y[5] ;
real R = y[6] ;
real dS_dt = -beta_0 * 0.5 * ((1 - (alpha_min / alpha_max)) * sin((2 * pi() / 365) * (t - t_max) + pi() / 2) + 1 + (alpha_min / alpha_max)) * (I1 + I2)*S/ N + lambda*R;
real dE1_dt = beta_0 * 0.5 * ((1 - (alpha_min / alpha_max)) * sin((2 * pi() / 365) * (t - t_max) + pi() / 2) + 1 + (alpha_min / alpha_max)) * (I1 + I2)*S/ N - 2*epsilon * E1;
real dE2_dt = 2*epsilon*(E1 -E2);
real dI1_dt = 2*(epsilon * E2 - mu * I1);
real dI2_dt = 2*mu*(I1 -I2);
real dR_dt = 2*mu * I2 - lambda*R;
return {dS_dt, dE1_dt, dE2_dt, dI1_dt, dI2_dt, dR_dt};
}
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
data {
int<lower=1> n_days;
int<lower=1> n_train_days;
int<lower=1> n_test_days;
real t0;
real ts_train[n_train_days];
real ts_test[n_test_days];
real mu;
real epsilon;
real lambda;
real alpha_max;
int N;
int cases[n_train_days/7];
} // checekd
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
transformed data {
real x_r[4]={mu, epsilon,lambda, alpha_max};
int x_i[1] = { N };
} //checked here are the constant parameters
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
parameters {
real<lower=0> alpha_min;
real<lower=1> t_max;
real<lower=0> beta_0;
real<lower=0> e0; // Common prior for e0_1 and e0_2
real<lower=0> i0; // Common prior for i0_1 and i0_2
real<lower=0> r0;
real<lower=0> phi_inv;
real<lower=0, upper=1> p_reported; // proportion of infected (symptomatic) people reported
} //checked
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
transformed parameters{
real y_train[n_train_days, 6];
real incidence_train[n_train_days];
real aggregated_incidence_train[n_train_days / 7]; // New aggregated incidence array
real phi = 1. / phi_inv;
real theta[6] = {alpha_min, t_max, beta_0, e0, i0, r0}; // Define all the theta parameters
//y_train = integrate_ode_rk45(seeiir, rep_array(0.0, 6), t0, ts_train, theta, x_r, x_i);
y_train = integrate_ode_rk45(seeiir, {N - (2*i0 + 2*e0 + r0), e0,e0, i0, i0, r0}, t0, ts_train, theta, x_r, x_i);
for (i in 1:n_train_days){
incidence_train[i] = 2*epsilon*y_train[i, 3] * p_reported; // the new defition of incidence in
}
//for (i in 1:n_train_days){
// real sum_y = 0.0;
// for (j in 1:6)
// {
// sum_y+=y_train[i,j];
// }
// print("sum_y:", sum_y);
// }
// Aggregate incidence for every seven days
for (j in 1:(n_train_days / 7)) {
int start_index = (j - 1) * 7 + 1;
int end_index = j * 7;
aggregated_incidence_train[j] = sum(incidence_train[start_index:end_index]);
}
}
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
model {
//priors
alpha_min ~ uniform(0,1);
t_max ~ uniform(1, 365);
beta_0 ~ uniform(0,1);
real upper_limit =N; // Upper limit as 10% of the population
e0 ~ uniform(0, N); // Apply the same prior to e0_1 and e0_2
i0 ~ uniform(0, N); // Apply the same prior to i0_1 and i0_2
r0 ~ uniform(0, N); // Flat prior for r0
phi_inv ~ exponential(5);
p_reported ~ uniform(0,1);
//sampling distribution
cases[1:(n_train_days / 7)] ~ neg_binomial_2(aggregated_incidence_train, phi);
}// checked
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
generated quantities {
real y_test[n_test_days, 6];
real incidence_test[n_test_days];
real aggregated_incidence_test[n_test_days / 7];
real aggregated_incidence[n_days/7];
real pred_cases[n_days / 7];
y_test = integrate_ode_rk45(seeiir, y_train[n_train_days], ts_train[n_train_days], ts_test, theta, x_r, x_i);
for (i in 1:n_test_days) {
incidence_test[i] = 2*epsilon*y_test[i, 3] * p_reported;
}
for (j in 1:(n_test_days / 7)) {
int start_index = (j - 1) * 7 + 1;
int end_index = j * 7;
aggregated_incidence_test[j] = sum(incidence_test[start_index:end_index]);
}
// Copy elements from array a into the new array c
for (i in 1:size(aggregated_incidence_train)) {
aggregated_incidence[i] = aggregated_incidence_train[i];
}
// Copy elements from array b into the new array c, continuing after array a's elements
for (j in 1:size(aggregated_incidence_test)) {
aggregated_incidence[size(aggregated_incidence_train) + j] = aggregated_incidence_test[j];
}
pred_cases = neg_binomial_2_rng(aggregated_incidence, phi);
}
Questions
- Does this approach correctly use the
generated quantities
block to predict future cases based on posterior samples fromy_train
? - Is there a better way to handle
y_test
predictions for higher accuracy?
Thank you for your input and suggestions!
[edit: escaped code]