Hello,
My group members and I are trying to replicate a paper which uses MCMC for an assignment. The paper uses stochastic discrete‐time SEIR model to calculate the movement of population between compartment at each time point. Since this is our first time dealing with Stan or Bayesian modelling, we are having some difficulties in understanding where we are going wrong. The paper that we are trying to replicate for our assignment is “https://onlinelibrary.wiley.com/doi/full/10.1111/j.1541-0420.2006.00609.x”.
Our Code:
library(rstan)
data1 <- ebola_kikwit_1995
Cases_Sym_onset <- data1$onset
Cases_Removed <- data1$death
n_days <- length(Cases_Sym_onset)
ncases <- sum(Cases_Sym_onset)
tcontrol=130
N = 5364500
ST= N-ncases
m= ST-5364500
m # final size of the epidemic
S <- vector(mode="numeric", length = 192)
S[1]=5364500 # initial conditions for S
E <- vector(mode="numeric", length = 192)
E[1]= 1 # initial conditions for E
I <- vector(mode="numeric", length = 192)
I[1]= 0 # initial conditions for I
R <- vector(mode="numeric", length = 192)
R[1]= 0 # initial conditions for R
Cases_Infected <- vector(mode="numeric", length = 192)
Cases_Infected[1] <- abs(m) # initial conditions
DataSEIR <- list(n_days = n_days, S0 = S , E0 = E, I0 = I, R0 = R,
N = N, tcon = tcontrol, C = Cases_Sym_onset,
D = Cases_Removed, B0 = Cases_Infected)
data {
int<lower=1> n_days;// total # of time point
int S0[n_days]; //of susceptible individuals at the start of the epidemic
int E0[n_days]; //of exposed individuals at the start of the epidemic
int I0[n_days]; //of infectious individuals at the start of the epidemic
int R0[n_days]; //of removed individuals at the start of the epidemic
int N; //population size
int C[n_days]; // empirical data
int D[n_days]; // empirical data
int tcon; // time point where control measures are introduced
int B0[n_days]; // initial conditions for unobserved entity.
}
parameters {
real<lower=0,upper=1> beta;
real<lower=1/10.5,upper=1/3.5> gamma;
real<lower=1/21.0,upper=1> rho;
real<lower=0> q;
}
transformed parameters{
real theta[4];
theta[1]= beta;
theta[2]= gamma;
theta[3]= rho;
theta[4]= q;
}
model{
int S[n_days]; // # of susceptible individuals
int E[n_days]; // # of exposed individuals
int I[n_days]; // # of infectious individuals
int R[n_days]; // # of removed individuals
real P[n_days]; // probability of leaving S compartment
int B[n_days]; // unobserved entity. # of susceptitble people who become infected
real Beta[n_days];
real pc; //probability of leaving E compartment
real pr; //probability of leaving I compartment
S[1]= S0[1] - B0[1];
E[1]= E0[1] + B0[1] - C[1];
I[1]= I0[1] + C[1] - D[1];
R[1]= N - S[1] - E[1] - I[1];
beta ~ gamma(2,10); // prior
gamma ~ gamma(2,14); // prior
rho ~ gamma(2,10); // prior
q ~ gamma(2,10); // prior
for(t in 2:n_days){
if(t >= tcon){
Beta[t]= beta*exp(-q*(t-tcon));
}
else{
Beta[t]= beta;
}
P[t]= 1 - exp(-(Beta[t]*I[t]/N));
pc = 1 - exp(-rho);
pr = 1- exp(-gamma);
S[t]= S[t-1] - B[t-1];
E[t]= E[t-1] + B[t-1] - C[t-1];
I[t]= I[t-1] + C[t-1] - D[t-1];
R[t] = N - S[t] - E[t] - I[t];
B[t] ~ binomial(S[t],P[t]); //
C[t] ~ binomial(E[t],pc); // # of cases by date of symptom onset
D[t] ~ binomial(I[t],pr); // # of cases removed from infectious class
}
}",
"stan_model1.stan")
stan_model1 <- "stan_model1.stan"
parameter= c("beta","gamma","rho","q")
ini= function(){list(beta = 0.2, gamma = 0.143, rho=0.2, q = 0.2)}
fit_seir <- stan(file=stan_model1,
data = DataSEIR,
pars = parameter,
init = ini,
iter = 1000,
chains = 1)
The values used in the model are according to the paper.
The error that we get is
Chain 1: Rejecting initial value:
Chain 1: Error evaluating the log probability at the initial value.
Chain 1: Exception: binomial_lpmf: Successes variable is -2147483648, but must be in the interval [0, -2142118856] (in 'model4adc26af40d5_stan_model1' at line 63)
Chain 1:
Chain 1: Initialization between (-2, 2) failed after 100 attempts.
Chain 1: Try specifying initial values, reducing ranges of constrained values, or reparameterizing the model.
[1] "Error in sampler$call_sampler(args_list[[i]]) : Initialization failed."
[1] "error occurred during calling the sampler; sampling not done."
Going through posts with similar errors, we do understand that the prior is not initialized or drawn from default rather than the one specified but no matter what changes we make, we get the same error.
If anymore can point us where we are going wrong or, if we are entirely wrong in the modelling would be much appreciated.
Thank you for reading the post.
Best regards.
Update:
I added the initialisation of B in the model as suggested by few comments but I still get the same error.
write( "
data {
int<lower=1> n_days;// total # of time point
int S0[n_days]; //of susceptible individuals at the start of the epidemic
int E0[n_days]; //of exposed individuals at the start of the epidemic
int I0[n_days]; //of infectious individuals at the start of the epidemic
int R0[n_days]; //of removed individuals at the start of the epidemic
int N; //population size
int C[n_days]; // empirical data
int D[n_days]; // empirical data
int tcon; // time point where control measures are introduced
int B0[n_days]; // initial conditions for unobserved entity.
}
parameters {
real<lower=0,upper=1> beta;
real<lower=1/10.5,upper=1/3.5> gamma;
real<lower=1/21.0,upper=1> rho;
real<lower=0> q;
}
transformed parameters{
real theta[4];
theta[1]= beta;
theta[2]= gamma;
theta[3]= rho;
theta[4]= q;
}
model{
int S[n_days]; // # of susceptible individuals
int E[n_days]; // # of exposed individuals
int I[n_days]; // # of infectious individuals
int R[n_days]; // # of removed individuals
real P[n_days]; // probability of leaving S compartment
int B[n_days]; // unobserved entity. # of susceptitble people who become infected
real Beta[n_days];
real pc; //probability of leaving E compartment
real pr; //probability of leaving I compartment
B[1]= B0[1];
S[1]= S0[1] - B0[1];
E[1]= E0[1] + B0[1] - C[1];
I[1]= I0[1] + C[1] - D[1];
R[1]= N - S[1] - E[1] - I[1];
beta ~ gamma(2,10); // prior
gamma ~ gamma(2,14); // prior
rho ~ gamma(2,10); // prior
q ~ gamma(2,10); // prior
for(t in 2:n_days){
if(t >= tcon){
Beta[t]= beta*exp(-q*(t-tcon));
}
else{
Beta[t]= beta;
}
S[t]= S[t-1] - B[t-1];
E[t]= E[t-1] + B[t-1] - C[t-1];
I[t]= I[t-1] + C[t-1] - D[t-1];
R[t] = N - S[t] - E[t] - I[t];
P[t]= 1 - exp(-(Beta[t]*I[t]/N));
pc = 1 - exp(-rho);
pr = 1- exp(-gamma);
B[t] ~ binomial(S[t],P[t]); //
C[t] ~ binomial(E[t],pc); // # of cases by date of symptom onset
D[t] ~ binomial(I[t],pr); // # of cases removed from infectious class
}
}",
"stan_model1.stan")
Error
Chain 1: Rejecting initial value:
Chain 1: Error evaluating the log probability at the initial value.
Chain 1: Exception: binomial_lpmf: Successes variable is -2147483648, but must be in the interval [0, 5363916] (in 'model2d9c5c0523ff_stan_model1' at line 65)
Chain 1:
Chain 1: Initialization between (-2, 2) failed after 100 attempts.
Chain 1: Try specifying initial values, reducing ranges of constrained values, or reparameterizing the model.
[1] "Error in sampler$call_sampler(args_list[[i]]) : Initialization failed."
[1] "error occurred during calling the sampler; sampling not done"