I’ve got the following reported daily cases of covid-19 for the UK,
library(deSolve)
library(dplyr)
library(ggplot2)
library(rstan)
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
cases <- c(0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 1, 0, 0, 5, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 2, 5, 3, 13, 4, 11, 34, 30, 48,
43, 67, 48, 61, 74, 0, 342, 342, 0, 403, 407, 676, 63, 1294, 1035,
665, 967, 1427, 1452, 2129, 2885, 2546, 2433, 2619, 3009, 4324, 4244,
4450, 3735, 5903, 3802, 3634, 5491, 4344, 8681, 5233, 5288, 4342,
5252, 4603, 4617, 5599, 5525, 5850, 4676, 4301, 4451, 4583, 5386,
4913, 4463, 4309, 3996, 4076, 6032, 6201, 4806, 4339, 3985, 4406,
6111, 5614, 4649, 3896, 3923)
N = length(cases)
sample_time = 1:N
Note that I added the libraries I am using and other definitions for clarity. I have a system of ODEs similar to SEIR model and I want to estimate the parameters of it based on the data I have in hand (i.e number of cases). I made my stan function as follow:
# The Stan model statement:
cat(
'
functions {
real[] SI(real t,
real[] y,
real[] params,
real[] x_r,
int[] x_i) {
real dydt[8];
dydt[1] = - params[2] * params[1] * y[1] * y[6] - params[1] * y[1] * y[5] - params[1] * y[8] * y[1] - params[3] * y[7] * y[1];
dydt[2] = params[2] * params[1] * y[1] * y[6] + (1 - params[4]) * params[1] * y[1] * y[5] + (1 - params[4]) * params[1] * y[8] * y[1] + (1 - params[4]) * params[3] * y[7] * y[1] - y[2] / params[5];
dydt[3] = params[6] * params[4] * params[1] * y[1] * y[5] + params[7] * params[4] * params[1] * y[8] * y[1] + params[8] * params[4] * params[3] * y[7] * y[1] - y[3] / params[5];
dydt[4] = (1 - params[6]) * params[4] * params[1] * y[1] * y[5] + (1 - params[7]) * params[4] * params[1] * y[8] * y[1] + (1 - params[8]) * params[4] * params[3] * y[7] * y[1] - y[4] / params[5];
dydt[5] = params[9] * y[2] / params[5] - y[5] / params[10];
dydt[6] = (1 - params[9]) * y[2] / params[5] - y[6] / params[11];
dydt[7] = y[3] / params[5] - y[7] / params[12];
dydt[8] = y[4] / params[5] - y[8] / params[13];
return dydt;
}
}
data {
int<lower = 1> n_obs; // Number of days sampled
int<lower = 1> n_params; // Number of model parameters
int<lower = 1> n_difeq; // Number of differential equations in the system
int<lower = 1> n_sample;
int y[n_obs]; // The binomially distributed data
real t0; // Initial time point (zero)
real ts[n_obs]; // Time points that were sampled
}
transformed data {
real x_r[0];
int x_i[0];
}
parameters {
real<lower = 0> params[n_params]; // Model parameters
real<lower = 0, upper = 1> S0; // Initial fraction of susceptible
}
transformed parameters{
real y_hat[n_obs, n_difeq]; // Output from the ODE solver
real y0[n_difeq]; // Initial conditions for both S and I
y0[1] = 1 - 0.02 - 0.001;
y0[2] = 0.02;
y0[3] = 0;
y0[4] = 0;
y0[5] = 0.001;
y0[6] = 0;
y0[7] = 0;
y0[8] = 0;
y_hat = integrate_ode_rk45(SI, y0, t0, ts, params, x_r, x_i);
}
model {
params[1] ~ normal(0.5, 0.9); //beta
params[2] ~ normal(0.2, 0.55); //mu
params[3] ~ normal(0, 1); //beta_m
params[4] ~ normal(0, 1); //phi
params[5] ~ normal(1, 20); //tau
params[6] ~ normal(0, 1); //pe
params[7] ~ normal(0, 1); //pie
params[8] ~ normal(0, 1); //pee
params[9] ~ normal(0.1, 0.99); //alpha
params[10] ~ normal(3, 20); //Ts
params[11] ~ normal(1, 20); //Ta
params[12] ~ normal(1, 20); //Tm
params[13] ~ normal(0.5, 8); //Ti
S0 ~ normal(0.5, 0.5); //constrained to be 0-1.
y ~ binomial(n_sample, y_hat[,5]); //y_hat[,5] are the fractions infected from the ODE solver
}
generated quantities {
real R_0;
R_0 = params[1]*params[11]*(params[9]*params[10]/params[11]*(1-params[4])+params[2]*(1-params[9]));
}
',
file = "SI_fit.stan", sep="", fill=T)
# FITTING
# For stan model we need the following variables:
stan_d = list(n_obs = N,
n_params = 13,
n_difeq = 8,
n_sample = 66650000,
y = cases,
t0 = 0,
ts = sample_time)
# Which parameters to monitor in the model:
params_monitor = c("y_hat", "y0", "params", "R_0")
# Test / debug the model:
test = stan("SI_fit.stan",
data = stan_d,
pars = params_monitor,
chains = 1, iter = 10)
# Fit and sample from the posterior
mod = stan(fit = test,
data = stan_d,
pars = params_monitor,
chains = 3,
warmup = 500,
iter = 1500,
control = list(adapt_delta = 0.99))
# You should do some MCMC diagnostics, including:
#traceplot(mod, pars="lp__")
#traceplot(mod, pars=c("params", "y0"))
#summary(mod)$summary[,"Rhat"]
# Extract the posterior samples to a structured list:
posts <- extract(mod)
there are 8 ODEs and 13 parameters. Note that params[4]
, params[6]
, params[7]
and params[8]
are probabilities and should be between 0 and 1 indeed. My aim is to given the priors estimate the posteriors. Yet when I run this I get two types of errors:
1.Chain 1: Rejecting initial value: Chain 1: Error evaluating the log probability at the initial value. Chain 1: Exception: binomial_lpmf: Probability parameter[3] is -3.6322e-06, but must be in the interval [0, 1] (in 'model25496b50bac3_SI_fit' at line 80)
this happens for other parameters too, essentially my parameters are non-negative yet stan sometimes samples negative ones.
and
Chain 1: Gradient evaluation took 0.00814 seconds Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 81.4 seconds. Chain 1: Adjust your expectations accordingly!
I wonder what I am doing wrong perhaps in making my priors or stan()
. Apologies in advance if my explanation is not good enough but I am a newbie on stan.