So there is a way to overcome this and that is in the level of ODEs, everywhere there is S we divide by N which is the population N was 1 now we put the real value for it so 66500000. I have made the following modifications:
On the R side I defined n <- 66500000
then on the stan side I have done the following on the function block:
functions {
real[] SI(real t,
real[] y,
real[] params,
real[] x_r,
int[] x_i) {
real dydt[8];
// Variables of the ODEs
real S = y[1];
real E = y[2];
real G = y[3];
real P = y[4];
real I = y[5];
real A = y[6];
real B = y[7];
real C = y[8];
// The parameters of the ODEs
real phi = params[1];
real pe = params[2];
real pie = params[3];
real pee = params[4];
real beta = params[5];
real mu = params[6];
real betam = params[7];
real tau = params[8];
real alpha = params[9];
real Ts = params[10];
real Ta = params[11];
real Tm = params[12];
real Ti = params[13];
dydt[1] = - (mu * beta * S * A - beta * S * I - beta * C * S - betam * B * S) / n;
dydt[2] = (mu * beta * S * A + (1 - phi) * beta * S * I + (1 - phi) * beta * C * S + (1 - phi) * betam * B * S)/n - E / tau;
dydt[3] = (pe * phi * beta * S * I + pie * phi * beta * C * S + pee * phi * betam * B * S)/n - G / tau;
dydt[4] = ((1 - pe) * phi * beta * S * I + (1 - pie) * phi * beta * C * S + (1 - pee) * phi * betam * B * S)/n - P / tau;
dydt[5] = alpha * E / tau - I / Ts;
dydt[6] = (1 - alpha) * E / tau - A / Ta;
dydt[7] = G / tau - B / Tm;
dydt[8] = P / tau - C / Ti;
return dydt;
}
}
the first 4 ODEs have S so I divide them accordingly by n
. Then in data block I added the following:
data {
int<lower = 0, upper = 66500000> n //population
...
}
where ...
means the rest are the same. Is this correct way of adding the parameter n
?
Then in parameter block I commented out the part I have //real<lower = 0, upper = 1> S0; // Initial fraction of susceptible
and in transformed parameter block I modified the initial conditions as:
transformed parameters{
...
y0[1] = (1 - 0.02 - 0.001)*66500000;
y0[2] = 0.02*66500000;
y0[3] = 0;
y0[4] = 0;
y0[5] = 0.001*66500000;
y0[6] = 0;
y0[7] = 0;
y0[8] = 0;
...
}
now in
stan_d = list(n_obs = length(datalist$cases),
n_params = 13,
n_difeq = 8,
y = datalist$cases,
t0 = 0,
ts = sample_time)
y
are integers. My full R + stan code is:
library(tidyverse)
library(deSolve)
library(dplyr)
library(ggplot2)
library(rstan)
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
datalist <- read.csv(file = 'xyzz.csv')
ggplot(data = datalist) +
geom_point(mapping = aes(x = date, y = cases)) +
labs(y = "Number of daily cases")
sample_time = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87,
88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103,
104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116)
n <- 66500000
# The Stan model statement:
cat(
'
functions {
real[] SI(real t,
real[] y,
real[] params,
real[] x_r,
int[] x_i) {
real dydt[8];
// Variables of the ODEs
real S = y[1];
real E = y[2];
real G = y[3];
real P = y[4];
real I = y[5];
real A = y[6];
real B = y[7];
real C = y[8];
// The parameters of the ODEs
real phi = params[1];
real pe = params[2];
real pie = params[3];
real pee = params[4];
real beta = params[5];
real mu = params[6];
real betam = params[7];
real tau = params[8];
real alpha = params[9];
real Ts = params[10];
real Ta = params[11];
real Tm = params[12];
real Ti = params[13];
dydt[1] = - (mu * beta * S * A - beta * S * I - beta * C * S - betam * B * S) / n;
dydt[2] = (mu * beta * S * A + (1 - phi) * beta * S * I + (1 - phi) * beta * C * S + (1 - phi) * betam * B * S)/n - E / tau;
dydt[3] = (pe * phi * beta * S * I + pie * phi * beta * C * S + pee * phi * betam * B * S)/n - G / tau;
dydt[4] = ((1 - pe) * phi * beta * S * I + (1 - pie) * phi * beta * C * S + (1 - pee) * phi * betam * B * S)/n - P / tau;
dydt[5] = alpha * E / tau - I / Ts;
dydt[6] = (1 - alpha) * E / tau - A / Ta;
dydt[7] = G / tau - B / Tm;
dydt[8] = P / tau - C / Ti;
return dydt;
}
}
data {
int<lower = 0, upper = 66500000> n //population
int<lower = 1> n_obs; // Number of days sampled
int<lower = 1> n_params; // Number of model parameters
int<lower = 1> n_difeq; // Number of differential equations in the system
//int<lower = 1> n_sample;
int y[n_obs]; // The binomially distributed data
real t0; // Initial time point (zero)
real ts[n_obs]; // Time points that were sampled
}
transformed data {
real x_r[0];
int x_i[0];
}
parameters {
real<lower = 0, upper = 1> phi;
real<lower = 0, upper = 1> pe;
real<lower = 0, upper = 1> pie;
real<lower = 0, upper = 1> pee;
real<lower = 0> beta;
real<lower = 0> mu;
real<lower = 0>betam;
real<lower = 0> tau;
real<lower = 0> alpha;
real<lower = 0> Ts;
real<lower = 0> Ta;
real<lower = 0> Tm;
real<lower = 0> Ti;
//real<lower = 0, upper = 1> S0; // Initial fraction of susceptible
}
transformed parameters{
real y_hat[n_obs, n_difeq]; // Output from the ODE solver
real y0[n_difeq]; // Initial conditions
real<lower=0, upper=1> params1[4];
real<lower = 0> params2[9];
y0[1] = (1 - 0.02 - 0.001)*66500000;
y0[2] = 0.02*66500000;
y0[3] = 0;
y0[4] = 0;
y0[5] = 0.001*66500000;
y0[6] = 0;
y0[7] = 0;
y0[8] = 0;
y_hat = integrate_ode_rk45(SI, y0, t0, ts, {phi, pe, pie, pee, beta, mu, betam, tau, alpha, Ts, Ta, Tm, Ti}, x_r, x_i);
//for(i in 1:n_obs){
//for(j in 1:n_difeq){
//y_hat[i, j] = 1e-6 + (1-2e-6)*y_hat[i, j];
//}
//}
}
model {
phi ~ normal(0, 1); //phi
pe ~ normal(0, 1); //pe
pie ~ normal(0, 1); //pie
pee ~ normal(0, 1); //pee
beta ~ normal(0.5, 0.9); //beta
mu ~ normal(0.2, 0.55); //mu
betam ~ normal(0, 1); //beta_m
tau ~ normal(1, 20); //tau
alpha ~ normal(0.1, 0.99); //alpha
Ts ~ normal(3, 20); //Ts
Ta ~ normal(1, 20); //Ta
Tm ~ normal(1, 20); //Tm
Ti ~ normal(0.5, 8); //Ti
//S0 ~ normal(0.5, 0.5); //constrained to be 0-1.
y ~ poisson(y_hat[,5]); //y_hat[,5] are the fractions infected from the ODE solver
}
generated quantities {
real R_0;
R_0 = beta * Ta * (alpha * Ts * (1 - phi) / Ta + mu * (1 - alpha));
}
',
file = "SI_fit.stan", sep="", fill=T)
# FITTING
# For stan model we need the following variables:
stan_d = list(n_obs = length(datalist$cases),
n_params = 13,
n_difeq = 8,
y = datalist$cases,
t0 = 0,
ts = sample_time)
# Which parameters to monitor in the model:
params_monitor = c("y_hat", "y0", "params", "R_0")
# Test / debug the model:
test = stan("SI_fit.stan",
data = stan_d,
pars = params_monitor,
chains = 1, iter = 10)
# Fit and sample from the posterior
#mod = stan(fit = test,
# data = stan_d,
# pars = params_monitor,
# chains = 3,
# warmup = 500,
# iter = 1000,
# control = list(adapt_delta = 0.99))
mod = stan(fit = test,
data = stan_d,
pars = params_monitor,
chains = 3,
warmup = 500,
iter = 1000)
# You should do some MCMC diagnostics, including:
#traceplot(mod, pars="lp__")
#traceplot(mod, pars=c("params", "y0"))
#summary(mod)$summary[,"Rhat"]
# Extract the posterior samples to a structured list:
posts <- extract(mod)
which is not running, I wonder if the problem is because I have defined n
on R side, or wether I made a mistake defining it on stan? namely my modification in data block. If I can do this everything would be integer and I should be able to run the naive likelihood I imagine.