Sorry - I seem to spam the fora today.
I think I’m nearly there with my hierarchical ODE model, but not sure how to start debugging this error message (generated by cmdstan):
Chain 1 Assertion failed!
Chain 1
Chain 1 Program: C:\Repos\Disim\Script\stan\SEIR_fitting_multiregion_hierarchical.exe
Chain 1 File: stan/lib/stan_math/lib/eigen_3.3.3/Eigen/src/Core/DenseCoeffsBase.h, Line 408
Chain 1
Chain 1 Expression: index >= 0 && index < size()
Chain 2 Assertion failed!
C
Any pointers would be much appreciated! Model code below.
functions {
real[] SEIR(
real t, // time
real[] y, // state
real[] theta, // parameters
data real[] x_r, // data (real)
data int[] x_i) { // data (integer)
real dydt[6];
// state
real S;
real E;
real I_symp;
real I_asymp;
real R1;
real R2;
// parameters
real epsilon;
real p;
real theta_local;
// fixed parameters for the model
real p_lower_inf; // lower infection for asymptomatic ?
real eta; //1 / latency phase
real p_symp; // probability of being symptomatic
real gammaD; // 1/length of infectiousness
real gamma_pos; // 1/length of being positive
real N;
real t_today;
real b_t;
S = y[1];
E = y[2];
I_symp = y[3];
I_asymp = y[4];
R1 = y[5];
R2 = y[6];
p_lower_inf= x_r[1];
eta= x_r[2];
gammaD= x_r[3];
gamma_pos= x_r[4];
t_today=x_i[1];
N = x_i[2];
p=theta[1];
epsilon=theta[2];
theta_local=theta[3];
p_symp= theta[4];
b_t = ((1-p)/(1+exp(-epsilon*((t-t_today))))+p)*theta_local;
dydt[1] = -b_t * S * I_symp/N - p_lower_inf*b_t * S * I_asymp/N;
dydt[2] = b_t * S * I_symp/N + p_lower_inf*b_t * S * I_asymp/N - eta*E;
dydt[3] = p_symp * eta * E - gammaD * I_symp;
dydt[4] = (1 - p_symp)* eta * E - gammaD * I_asymp;
dydt[5] = gammaD * (I_symp + I_asymp) - gamma_pos * R1;
dydt[6] = gamma_pos * R1;
return dydt;
}
}
data {
int<lower=1> nRegions;
int<lower=1> maxObs;
int<lower=1> nObs[nRegions]; // number of observations per region
real i0[nRegions]; // starting (observed) incidence
real t0[nRegions]; // starting time
matrix[maxObs,nRegions] ts; //time points for observations
matrix[maxObs,nRegions] incidence; // observed incidence values over time
//int<lower=1> n_pred; //
//real t_pred[n_pred]; // time points for predictions.
// fixed parameters for the model
real p_lower_inf; // lower infection for asymptomatic ?
real eta; //1 / latency phase
real gammaD; // 1/length of infectiousness
real gamma_pos; // 1/length of being positive
int t_today; // time point of lockdown
int N[nRegions]; // Population size
// Data for surveys
int n_surveys;
int survey_counts[2,n_surveys];
int survey_t[2,n_surveys];
int survey_regions[n_surveys];
}
transformed data {
real x_r[0];
int x_i[0];
vector[4] theta_fixed[nRegions];
int theta_int[nRegions,2];
for(r in 1:nRegions){
theta_fixed[r][1] = p_lower_inf;
theta_fixed[r][2] = eta;
theta_fixed[r][3] = gammaD;
theta_fixed[r][4] = gamma_pos;
theta_int[r,1] = t_today;
theta_int[r,2] = N[r];
}
}
parameters {
// following https://mc-stan.org/docs/2_19/stan-users-guide/multivariate-hierarchical-priors-section.html
corr_matrix[4] Omega;
vector<lower=0>[4] tau;
vector[4] theta_untransformed[nRegions];
real<lower=0> sigma;
vector[4] gamma; // group coeffs
//vector<lower=0,upper=1>[nRegions] p; // for infectivity function
//vector[nRegions] epsilon; // for infectivity function
//vector[nRegions] theta_local_t; // for infectivity function - on the whole real line
//vector<lower=0,upper=1>[nRegions] p_symp; // proportion of symptomatic cases
}
transformed parameters{
vector[4] theta[nRegions];
vector[6] y0[nRegions]; // starting state of the SEIR
matrix[maxObs,6] y_hat[nRegions]; // S,E,Is,Ia,R1,R2 values over time
vector[maxObs] inc_hat[nRegions];
real Pos_hat_surveys[n_surveys];
vector[maxObs] Pos_hat[nRegions]; // estimated number that would test positive in a survey.
vector[maxObs] sq_err[nRegions];
for(r in 1:nRegions){
theta[r][1]=inv_logit(theta_untransformed[r][1]); //p
theta[r][2]=theta_untransformed[r][2]; //epsilon
theta[r][3]=exp(theta_untransformed[r][3]); //theta_local
theta[r][4]=inv_logit(theta_untransformed[r][4]); //p_symp
y0[r][1]= (N[r] - i0[r]*(1 + (1-theta[r][4])/theta[r][4]));
y0[r][2] = 0;
y0[r][3] = i0[r];
y0[r][4] = i0[r]*(1-theta[r][4])/theta[r][4];
y0[r][5] = 0;
y0[r][6] = 0;
y_hat[r][1:nObs[r],1:6] = to_matrix(integrate_ode_rk45(SEIR, to_array_1d(y0[r]), t0[r], to_array_1d(ts[1:nObs[r],r]), to_array_1d(theta[r]),
to_array_1d(theta_fixed[r][1:2]), to_array_1d(theta_int[r])));
Pos_hat[r][1:nObs[r]] = to_vector(y_hat[r][1:nObs[r],3])+ to_vector(y_hat[r][1:nObs[r],4]) +
to_vector(y_hat[r][1:nObs[r],5]);
inc_hat[r][1:nObs[r]]= (eta*theta[r][4])*to_vector(y_hat[r][1:nObs[r],2]);
for(t in 1:nObs[r]){
sq_err[t,r]=(incidence[r][t] - inc_hat[r][t])^2;
}
}
for(s in 1:n_surveys){
Pos_hat_surveys[s] = mean(Pos_hat[survey_regions[s]][survey_t[1,survey_regions[s]]:survey_t[2,survey_regions[s]]]);
}
}
model {
tau ~ cauchy(0, 2.5);
Omega ~ lkj_corr(2);
to_vector(gamma) ~ normal(0, 5);
for(r in 1:nRegions){
theta_untransformed[r] ~ multi_normal(gamma, quad_form_diag(Omega, tau));
}
//epsilon~ normal(0,1); // for infectivity function
// p ~ beta(3,1); // for infectivity function
// theta_local~ normal(log(1),log(3)); // for infectivity function
// p_symp~ beta(1,3); //beta(178,10000); // proportion of symptomatic cases
for(r in 1:nRegions){
target += - nObs[r] * log(sum(to_vector(sq_err[r][1:nObs[r]])))/2;
}
for(s in 1:n_surveys){
survey_counts[1,s] ~ binomial(survey_counts[2,s],Pos_hat_surveys[s]/N[survey_regions[s]]);
}
}
generated quantities {
vector<lower=0,upper=1>[nRegions] p; // for infectivity function
vector[nRegions] epsilon; // for infectivity function
vector[nRegions] theta_local; // for infectivity function - on the whole real line
vector<lower=0,upper=1>[nRegions] p_symp; // proportion of symptomatic cases
for(r in 1:nRegions){
p[r]=theta[r][1] ;
epsilon[r]=theta[r][2] ;
theta_local[r]=theta[r][3] ;
p_symp[r]=theta[r][4] ;
}
}
Update: I’ve tried tracing down the error, and for the time being removed all the multivariate normal stuff. Still no luck - same issue as before. Current version of code that causes issues below.
//Taken from https://jrmihalj.github.io/estimating-transmission-by-fitting-mechanistic-models-in-Stan/
functions {
// This largely follows the deSolve package
real[] SEIR(
real t, // time
real[] y, // state
real[] theta, // parameters
data real[] x_r, // data (real)
data int[] x_i) { // data (integer)
real dydt[6];
// state
real S;
real E;
real I_symp;
real I_asymp;
real R1;
real R2;
// parameters
real epsilon;
real p;
real theta_local;
// fixed parameters for the model
real p_lower_inf; // lower infection for asymptomatic ?
real eta; //1 / latency phase
real p_symp; // probability of being symptomatic
real gammaD; // 1/length of infectiousness
real gamma_pos; // 1/length of being positive
real N;
real t_today;
real b_t;
S = y[1];
E = y[2];
I_symp = y[3];
I_asymp = y[4];
R1 = y[5];
R2 = y[6];
p_lower_inf= x_r[1];
eta= x_r[2];
gammaD= x_r[3];
gamma_pos= x_r[4];
t_today=x_i[1];
N = x_i[2];
p=theta[1];
epsilon=theta[2];
theta_local=theta[3];
p_symp= theta[4];
b_t = ((1-p)/(1+exp(-epsilon*((t-t_today))))+p)*theta_local;
dydt[1] = -b_t * S * I_symp/N - p_lower_inf*b_t * S * I_asymp/N;
dydt[2] = b_t * S * I_symp/N + p_lower_inf*b_t * S * I_asymp/N - eta*E;
dydt[3] = p_symp * eta * E - gammaD * I_symp;
dydt[4] = (1 - p_symp)* eta * E - gammaD * I_asymp;
dydt[5] = gammaD * (I_symp + I_asymp) - gamma_pos * R1;
dydt[6] = gamma_pos * R1;
return dydt;
}
}
data {
int<lower=1> nRegions;
int<lower=1> maxObs;
int<lower=1> nObs[nRegions]; // number of observations per region
real i0[nRegions]; // starting (observed) incidence
real t0[nRegions]; // starting time
matrix[maxObs,nRegions] ts; //time points for observations
matrix[maxObs,nRegions] incidence; // observed incidence values over time
//int<lower=1> n_pred; //
//real t_pred[n_pred]; // time points for predictions.
// fixed parameters for the model
real p_lower_inf; // lower infection for asymptomatic ?
real eta; //1 / latency phase
real gammaD; // 1/length of infectiousness
real gamma_pos; // 1/length of being positive
int t_today; // time point of lockdown
int N[nRegions]; // Population size
// Data for surveys
int n_surveys;
int survey_counts[2,n_surveys];
int survey_t[2,n_surveys];
int survey_regions[n_surveys];
}
transformed data {
vector[4] theta_fixed[nRegions];
int theta_int[nRegions,2];
for(r in 1:nRegions){
theta_fixed[r][1] = p_lower_inf;
theta_fixed[r][2] = eta;
theta_fixed[r][3] = gammaD;
theta_fixed[r][4] = gamma_pos;
theta_int[r,1] = t_today;
theta_int[r,2] = N[r];
}
}
parameters {
// following https://mc-stan.org/docs/2_19/stan-users-guide/multivariate-hierarchical-priors-section.html
//cholesky_factor_corr[4] Omega;
vector<lower=0>[4] tau;
//cov_matrix[4] sigma_pars;
real<lower=0> sigma_pars;
vector[4] gamma; // group coeffs
vector[4] theta_untransformed[nRegions];
real<lower=0> sigma;
//vector<lower=0,upper=1>[nRegions] p; // for infectivity function
//vector[nRegions] epsilon; // for infectivity function
//vector[nRegions] theta_local_t; // for infectivity function - on the whole real line
//vector<lower=0,upper=1>[nRegions] p_symp; // proportion of symptomatic cases
}
transformed parameters{
vector[4] theta[nRegions];
vector[6] y0[nRegions]; // starting state of the SEIR
matrix[maxObs,6] y_hat[nRegions]; // S,E,Is,Ia,R1,R2 values over time
vector[maxObs] inc_hat[nRegions];
real Pos_hat_surveys[n_surveys];
vector[maxObs] Pos_hat[nRegions]; // estimated number that would test positive in a survey.
vector[maxObs] sq_err[nRegions];
//vector[4] zeros; //For now, means are zero
//matrix[4,4] identity; //Identity for scaling matrix
//cov_matrix[4] sigma_pars;
//zeros = rep_vector(0, 4);
//identity = diag_matrix(rep_vector(1.0,4));
for(r in 1:nRegions){
theta[r][1]=inv_logit(theta_untransformed[r][1]); //p
theta[r][2]=theta_untransformed[r][2]; //epsilon
theta[r][3]=exp(theta_untransformed[r][3]); //theta_local
theta[r][4]=inv_logit(theta_untransformed[r][4]); //p_symp
y0[r][1]= (N[r] - i0[r]*(1 + (1-theta[r][4])/theta[r][4]));
y0[r][2] = 0;
y0[r][3] = i0[r];
y0[r][4] = i0[r]*(1-theta[r][4])/theta[r][4];
y0[r][5] = 0;
y0[r][6] = 0;
y_hat[r][1:nObs[r],1:6] = to_matrix(integrate_ode_rk45(SEIR, to_array_1d(y0[r]), t0[r], to_array_1d(ts[1:nObs[r],r]), to_array_1d(theta[r]),
to_array_1d(theta_fixed[r][1:2]), to_array_1d(theta_int[r])));
Pos_hat[r][1:nObs[r]] = to_vector(y_hat[r][1:nObs[r],3])+ to_vector(y_hat[r][1:nObs[r],4]) +
to_vector(y_hat[r][1:nObs[r],5]);
inc_hat[r][1:nObs[r]]= (eta*theta[r][4])*to_vector(y_hat[r][1:nObs[r],2]);
for(t in 1:nObs[r]){
sq_err[t,r]=(incidence[r][t] - inc_hat[r][t])^2;
}
//print(sq_err[,1]);
//blah
}
//sigma_pars = diag_pre_multiply(tau, Omega);
for(s in 1:n_surveys){
Pos_hat_surveys[s] = mean(Pos_hat[survey_regions[s]][survey_t[1,survey_regions[s]]:survey_t[2,survey_regions[s]]]);
}
}
model {
//using cholesky decomposition per
//https://discourse.mc-stan.org/t/trouble-with-prior-selection-for-multivariate-normal-inverse-wishart-analysis/6088/13
//sigma_pars ~ inv_wishart(zeros,identity);
//tau ~ cauchy(0, 2.5);
//Omega ~ lkj_corr_cholesky(2);
to_vector(gamma) ~ normal(0, 5);
//print(Omega);
print(tau);
for(r in 1:nRegions){
theta_untransformed[r] ~ normal(gamma, sigma_pars);//multi_normal_cholesky(gamma, sigma_pars);
}
//epsilon~ normal(0,1); // for infectivity function
// p ~ beta(3,1); // for infectivity function
// theta_local~ normal(log(1),log(3)); // for infectivity function
// p_symp~ beta(1,3); //beta(178,10000); // proportion of symptomatic cases
for(r in 1:nRegions){
target += - nObs[r] * log(sum(to_vector(sq_err[r][1:nObs[r]])))/2;
}
for(s in 1:n_surveys){
survey_counts[1,s] ~ binomial(survey_counts[2,s],Pos_hat_surveys[s]/N[survey_regions[s]]);
}
}
generated quantities {
vector<lower=0,upper=1>[nRegions] p; // for infectivity function
vector[nRegions] epsilon; // for infectivity function
vector[nRegions] theta_local; // for infectivity function - on the whole real line
vector<lower=0,upper=1>[nRegions] p_symp; // proportion of symptomatic cases
for(r in 1:nRegions){
p[r]=theta[r][1] ;
epsilon[r]=theta[r][2] ;
theta_local[r]=theta[r][3] ;
p_symp[r]=theta[r][4] ;
}
}
Final edit: solved. It was a stupid index problem ( the row
sq_err[t,r]=(incidence[r][t] - inc_hat[r][t])^2;
should say
sq_err[r][t]=(incidence[t,r] - inc_hat[r][t])^2;
).
Not sure why the error message was so cryptic and without reference to the offending variables, but the old-school approach of commenting out line-by-line worked.