Newbie help with multivariate normal hierarchical ODE - "No distribution 'multi_normal' found"

Hi,

I’m working on fitting an ODE model in Stan, and am hoping to use a multivariate normal prior for the 4 ODE parameters to be fitted.

I’m stumbling my way through the example at https://mc-stan.org/docs/2_24/stan-users-guide/multivariate-hierarchical-priors-section.html, but must admit I’m a bit confused, and I still haven’t gotten my head straight with all the different Stan types.

When trying to compile my current version of the code, I’m getting the error

  210:  
   211:    for(r in 1:nRegions){      
   212:    theta_untransformed[r] ~ multi_normal(gamma, quad_form_diag(Omega, tau));
           ^
   213:    }
   214:    
   -------------------------------------------------

Ill-typed arguments to '~' statement. No distribution 'multi_normal' was found with the correct signature.

If anyone have the time to give a few pointers, that would be much appreciated!

Full model code below.


functions {
  
  
  real[] SEIR(
           real t,       // time
           real[] y,      // state
           real[] theta,  // parameters
           data real[] x_r,    // data (real)
           data int[] x_i) {   // data (integer)
            
            
    
    real dydt[6];
    
    // state
    real S;
    real E;
    real I_symp;
    real I_asymp;
    real R1;
    real R2;
    
    // parameters
    real epsilon;
    real p;
    real theta_local;
    
    
    // fixed parameters for the model
    real p_lower_inf;  // lower infection for asymptomatic ? 
    real eta;  //1 / latency phase 
    real p_symp; // probability of being symptomatic
    real gammaD;  // 1/length of infectiousness
    real gamma_pos;  // 1/length of being positive

    real N;
    real t_today;
    
    
    real b_t;
    
    S = y[1];
    E = y[2];
    I_symp = y[3];
    I_asymp = y[4];
    R1 = y[5];
    R2 = y[6];
    
    
    p_lower_inf= x_r[1];
    eta= x_r[2];
    gammaD= x_r[3];
    gamma_pos= x_r[4];
    
    t_today=x_i[1];
    N = x_i[2];
    
    p=theta[1];
    epsilon=theta[2];
    theta_local=theta[3];
    p_symp= theta[4];

    
    b_t = ((1-p)/(1+exp(-epsilon*((t-t_today))))+p)*theta_local;
      
    
    dydt[1] = -b_t * S * I_symp/N - p_lower_inf*b_t * S * I_asymp/N;
    dydt[2] =  b_t * S * I_symp/N + p_lower_inf*b_t * S * I_asymp/N - eta*E;
    dydt[3] =  p_symp * eta * E      - gammaD * I_symp;
    dydt[4] =  (1 - p_symp)* eta * E - gammaD * I_asymp;
    dydt[5] = gammaD * (I_symp + I_asymp) - gamma_pos * R1;
    dydt[6] =  gamma_pos * R1;
    
    return dydt;
  }
  
}

data {
  int<lower=1> nRegions;
  int<lower=1> maxObs;
  
  int<lower=1> nObs[nRegions]; // number of observations per region
  real i0[nRegions]; // starting (observed) incidence
  
  real  t0[nRegions];    // starting time
  matrix[maxObs,nRegions] ts; //time points for observations
  matrix[maxObs,nRegions] incidence;   // observed incidence values over time

  //int<lower=1> n_pred; //
  //real t_pred[n_pred]; // time points for predictions. 
  
   // fixed parameters for the model
  real p_lower_inf;  // lower infection for asymptomatic ? 
  real eta;  //1 / latency phase 
  real gammaD;  // 1/length of infectiousness
  real gamma_pos;  // 1/length of being positive

  int t_today;  // time point of lockdown
  int N[nRegions];       // Population size
  
  
  
  // Data for surveys 
  
  int n_surveys;
  int survey_counts[2,n_surveys];
  int survey_t[2,n_surveys];
  int survey_regions[n_surveys];
}
transformed data {
  real x_r[0]; 
  int x_i[0];
  vector[4] theta_fixed[nRegions];
  int theta_int[2,nRegions];
    
  for(r in 1:nRegions){
    theta_fixed[r][1] = p_lower_inf;
    theta_fixed[r][2] = eta;
    theta_fixed[r][3] = gammaD;
    theta_fixed[r][4] = gamma_pos;
    
    theta_int[1,r] = t_today;
    theta_int[2,r] = N[r];
  }
  
}
parameters {
    // following https://mc-stan.org/docs/2_19/stan-users-guide/multivariate-hierarchical-priors-section.html 
    
    corr_matrix[4] Omega;
    vector<lower=0>[4] tau;

    vector[4] theta_untransformed[nRegions];
    
    real<lower=0> sigma;
    //vector<lower=0,upper=1>[nRegions] p;        //  for infectivity function
    //vector[nRegions] epsilon;  //  for infectivity function
    //vector[nRegions] theta_local_t;  //  for infectivity function - on the whole real line
    //vector<lower=0,upper=1>[nRegions] p_symp;  // proportion of symptomatic cases
    
  
}

transformed parameters{
  
    vector[4] theta[nRegions];
    

    

  
    vector[6] y0[nRegions]; // starting state of the SEIR
    matrix[6,maxObs] y_hat[nRegions];   // S,E,Is,Ia,R1,R2 values over time
    vector[maxObs] inc_hat[nRegions];
    
    real Pos_hat_surveys[n_surveys];
    
    vector[maxObs] Pos_hat[nRegions];  // estimated number that would test positive in a survey. 
    vector[maxObs] sq_err[nRegions];
    
    matrix[1,4] gamma;           // group coeffs
  
    for(r in 1:nRegions){
    theta[r][1]=inv_logit(theta_untransformed[r][1]); //p
    theta[r][2]=theta_untransformed[r][2]; //epsilon
    theta[r][3]=exp(theta_untransformed[r][3]); //theta_local
    theta[r][4]=inv_logit(theta_untransformed[r][4]); //p_symp
   
   
    y0[1,r]= (N[r] - i0[r]*(1 + (1-theta[r][4])/theta[r][4]));
    y0[2,r] = 0;
    y0[3,r] = i0[r];
    y0[4,r] = i0[r]*(1-theta[r][4])/theta[r][4]; 
    y0[5,r] = 0;
    y0[6,r] = 0;
          
    y_hat[r][1:nObs[r],1:6] = to_matrix(integrate_ode_rk45(SEIR, y0[1:6,r], t0[r], to_array_1d(ts[1:nObs[r],r]), to_array_1d(theta[r]), 
    to_array_1d(theta_fixed[1:2,r]), to_array_1d(theta_int[r])));

    Pos_hat[r][1:nObs[r]]    =  to_vector(y_hat[1:nObs[r],r,3])+ to_vector(y_hat[1:nObs[r],r,4]) +
                                to_vector(y_hat[1:nObs[r],r,5]);
    
    inc_hat[r][1:nObs[r]]= (eta*theta[r][4])*to_vector(y_hat[1:nObs[r],r,2]); 
                                                         
    for(t in 1:nObs[r]){
      sq_err[t,r]=(incidence[r][t] - inc_hat[t,r])^2;
      
    }
    //print(sq_err[,1]);
    
    
    
  }
  for(s in 1:n_surveys){
    Pos_hat_surveys[s] =  mean(Pos_hat[survey_regions[s]][survey_t[1,survey_regions[s]]:survey_t[2,survey_regions[s]]]);
        }
        
        
}
model {
  tau ~ cauchy(0, 2.5);
  Omega ~ lkj_corr(2);
  
   to_vector(gamma) ~ normal(0, 5);

  for(r in 1:nRegions){      
  theta_untransformed[r] ~ multi_normal(gamma, quad_form_diag(Omega, tau));
  }
  
 // epsilon~ normal(0,1);  //  for infectivity function
  //p ~ beta(3,1); //  for infectivity function
  //theta_local~ normal(log(1),log(3));  //  for infectivity function
  
  //p_symp~ beta(1,3); //beta(178,10000);  // proportion of symptomatic cases
 for(r in 1:nRegions){
 
  target += - nObs[r] * log(sum(to_vector(sq_err[r][1:nObs[r]])))/2;  
 }

    for(s in 1:n_surveys){
  survey_counts[1,s] ~ binomial(survey_counts[2,s],Pos_hat_surveys[s]/N[survey_regions[s]]);
  }
  
 
}
generated quantities {
    vector<lower=0,upper=1>[nRegions] p;        //  for infectivity function
    vector[nRegions] epsilon;  //  for infectivity function
    vector[nRegions] theta_local;  //  for infectivity function - on the whole real line
    vector<lower=0,upper=1>[nRegions] p_symp;  // proportion of symptomatic cases
    
    
     p[r]=theta[1,r] ;
     epsilon[r]=theta[2,r] ;
     theta_local[r]=theta[3,r] ;
     p_symp[r]=theta[4,r] ;
    
    //add here
}

You have matrix[1,4] gamma; in the transformed parameters block, but:

  1. multi_normal expects a vector argument for the mean.
  2. You don’t seem to assign anything into the elements of gamma in the transformed parameters block, so it will be full of NA, nonsensical values, or zeros (because it’s not initialised – I can’t remember which of these happens though).

Move gamma to the parameters block if you want to sample it, and change the declaration to vector [4] gamma;

1 Like

Thank you - that solved it! I hadn’t followed through when removing the predictors u_gamma part from the example, to make gamma a vector instead of a matrix.