Hello,
I’ve created a Stan model to estimate transmission parameters of an infectious disease model (cholera). The model is similar to a standard SIR-style ODE model, with some variations. The model allows for uncertainty in the data by modelling it as the ‘true’ (deterministic) values plus some random noise given as N(0,\sigma). Currently, I am testing the model, so I am feeding it data produced using the same mathematical model given to Stan (a deterministic ODE model with Gaussian N(0,\sigma) noise). This happens even with large numbers of iterations (20,000).
The Stan model runs well, with no warnings. The trace and pair plots all look very healthy, and the model is estimating the transmission parameters consistently and accurately. However, even though I have given provided the Stan model data with low noise (\sigma=1), Stan is consistently estimating a value of \sigma = 567 (see posterior histograms for 6 chains below).
Does anyone have any idea why this might be? And if all I really care about is the transmission parameters, is this even an issue?
(see below for attached R code and Stan code)
R Code
#Set values
N = 10000
I0 = 10
B0 = 0
S0 = N - I0
num_days = 300
M = num_days - 1
ts = seq(0,M)
ts_stan = seq(1,M)
#Initial Values
y0 <- c(S = S0, I = I0, B = B0)
#Parameters to be estimated
theta <- c(
a = 1,
e = 10,
r = 0.2
)
#Force of infection
f <- function(B){
lambda <- B/(B + 10**6)
return(lambda)
}
#Define Diffential Equations of model
SIB_equations <- function(time, variables, parameters){
with(as.list(c(variables, parameters)), {
net_b = -0.33
mu = 0.0001
dS <- mu * (N-S) - a * f(B) * S
dI <- a * f(B) * S - r * I
dB <- B * net_b + e * I
return(list(c(dS, dI, dB)))
})
}
#Analyse DEs
SIB_values <- as.matrix(ode(y = y0, times = ts, SIB_equations, parms = theta))
df <- data.frame(SIB_values)
#Create some noise
sigma = 1
noise <- replicate(3, rnorm(dim(SIB_values)[1], 0, sd = sigma))
#Make sure we don't get any negative values
positive <- function(x){
if (x < 0){y <- 0}
else{y <- x}
return(y)
}
noisy_SIB <- SIB_values[,2:4] + noise
pos_noisy <- matrix(sapply(noisy_SIB, positive),ncol = dim(noisy_SIB)[2])
colnames(pos_noisy) <- c("S_noise", "I_noise", "B_noise")
df <- data.frame(cbind(SIB_values, pos_noisy))
df_stan <- df[-c(1),][c("S_noise", "I_noise", "B_noise")]
#######RUN MCMC ####################
stan_data <- list(M = M, ts = ts_stan, y_init = y0, z_init = y0, y = df_stan)
fit <- stan(file = "codeco_exp.stan", data = stan_data, warmup = 1000, iter = 2000, chains = 4, control = list(adapt_delta = 0.9))
Stan Code
functions{
real f(real B) {
real force;
force = B/(B+1000000);
return force;
}
real[] dz_dt(real t, //time
real[] z, // state
real[] theta, // parameters
real[] x_r, // data (real)
int[] x_i) { // data (integer)
real S = z[1]; //Unpack state variables
real I = z[2];
real B = z[3];
real N = 10000; //Known parameters
real mu = 0.0001;
real net_b = -0.33;
real a = theta[1]; //Unknown parameters
real e = theta[2];
real r = theta[3];
real dS_dt = mu * (N - S) - a * f(B) * S;
real dI_dt = a * f(B) * S - r * I;
real dB_dt = B * net_b + e * I;
return {dS_dt, dI_dt, dB_dt};
}
}
data{
int<lower=0> M; //Number of measurements
real<lower = 0> ts[M]; //Measurement times > 0
real<lower = 0> y_init[3]; //Measured initial conditions
real<lower = 0> z_init[3]; //True initial conditions
real<lower = 0> y[M,3]; //Data
}
parameters{
real<lower=0> theta[3]; //theta = [a,e,r]
real<lower=0> sigma; //Error scale
}
transformed parameters{
real z[M,3]
= integrate_ode_rk45(dz_dt, z_init, 0.0, ts, theta,
rep_array(0.0,0), rep_array(0,0),
1e-6, 1e-5, 1e3);
}
model{
//Priors
theta[1] ~ gamma(3,0.6); //a
theta[2] ~ gamma(10,1); //e
theta[3] ~ gamma(3, 0.06); //r
sigma ~ gamma(2, 0.6); //sigma
for (k in 1:3) {
y_init ~ normal(z_init[k], sigma);
y[,k] ~ normal(z[,k], sigma);
}
}