Add an unknown discrete parameter following a Bernoulli distribution

Germante · March 14, 2025, 9:10pm

Thank you all for your answers! I’ve now understood how to marginalise latent classes in the posterior with Stan! ;)
The model works very well even with a rather sparse dataset (very fast run, well-mixed chains, unbiased estimates). However, the model doesn’t correspond exactly to what I’d like to model. In fact, I’d like to have the same probability of infection (Pinf) per individual, for all his observations. But with the model you’ve proposed, this Pinf varies from one observation to another within the same individual, which isn’t logical - either the individual is really infected, or he’s not. So I wrote this model to take into account a single Pinf for all the tests of an individual. It runs, but very slowly on the same data sets, with same priors, and I get rather unstable results with more or less significant biases. Not all individuals have the same number of observations. This information is contained in the vector Ntest[n_id], so we only loop over the data for each individual. Could you have a look, it’s likely that I’ve written the model in a way that doesn’t conform to Stan, which could explain the current situation.

functions {
    // Ctfun returns dCt, the delta Ct below Vinf:
    real CtVinffun(real t, real tinf, int lod, int Vinf, real tp, real Tp, real Tc, real Vp){
		if (t <= tinf)
		  return(Vinf); // Ct = 0
        else if (t >= tinf && t <= tp)
          return(Vp*(t-tinf)/Tp); // Viral load rises before peak: Tp = tp-tinf
        else // Viral load falls after peak: (t > tp)
           return(Vp+(lod-Vp)*(t-tp)/Tc); // Tc = tc-tp
    }
}


data{
  int<lower=0> N; // Number of concatenated data points
  int<lower=0> n_id; // Number of individuals 
  int lod; // Limit of detection of Ct (Ct=10)
  int Vinf; // Ct delta from lod at infection (Ct=0) 
  int<lower=0> id[N]; // Vector marking which datum belongs to which id
  real tSS[n_id]; // Vector of individual time of symptom onset 
  int nb_random; // Total number of parameters with a random effect
  int Ntest[n_id]; // Number of observations for each individual
  int max_Ntest; // Number of maximum of repeated test in the population
  matrix[n_id,max_Ntest] time; // array of dim n_id,max_Ntest containing time of observations 
  matrix[n_id, max_Ntest] y;  // array of dim n_id,max_Ntest containing Ct of observations
  matrix[n_id,max_Ntest] censor; // array of dim n_id,max_Ntest containing censor indication
}

transformed data{
    real log_0002 = log(0.0002); // log proba of false positive
    real log_9998 = log(0.9998); // log proba of true negative
    matrix[n_id,max_Ntest] use_obs_ll;  // 0 = lod; 1 = obs
    for(i in 1:n_id){
      for (n in 1:Ntest[i]){
        if(censor[i,n] == 1){
            use_obs_ll[i,n] = 0;
        } else{
            use_obs_ll[i,n] = 1;
        }
      }
    }
}


parameters{
  real mu_Vp; // fixed effect of Vp 
  real<lower=0> mu_Tp; // fixed effect of Tp
  real<lower=0> mu_Tc; // fixed effect of Tc
  real<lower=0, upper=14> mu_Tincub; // fixed effect of Tincub
  real<lower=0> sigma; // additive measurement error
  matrix[n_id,nb_random] eta_tilde; // Gaussian to center random effect to 0
  real<lower=0> eta_omega[nb_random]; // Variance of random effect for Vp, Tp, Tc, and Tincub
  real<lower=0, upper=1> Pinf; // Proportion of infected individuals in the population
}


transformed parameters{
  real<lower=0, upper=14> Tincub[n_id]; // Incubation period cannot exceed 14 days
  matrix[n_id,nb_random] eta; // matrix of random effects for Vp, Tp, Tc, and Tincub
  real Vp[n_id]; // Vector of individual Vp
  real<lower=0> Tp[n_id]; // Vector of individual Tp
  real<lower=0> Tc[n_id]; // Vector of individual Tc
  row_vector[n_id] tp; // Vector of individual tp
  row_vector[n_id] tinf; // Vector of individual tinf
  matrix<upper=50>[n_id,max_Ntest] Ct; // array of dim n_id,N_max_Tests containing Ct pred of each observation
  real num_arg[n_id, max_Ntest, 2]; // array containing individual LL contribution
  num_arg =rep_array(0, n_id, max_Ntest, 2); // Initializing num_arg to 0 (defaut was NaN)
  Ct =  to_matrix(rep_array(0, n_id, max_Ntest)); // // Initializing Ct to 0 (defaut was NaN)
  matrix[n_id,2] ll_Pinf; // matrix of two columns : Pinf and 1-Pinf for each individual
  ll_Pinf[,1] = to_vector(rep_array(log(Pinf), n_id)); // filling the matrix
  ll_Pinf[,2] = to_vector(rep_array(log1m(Pinf), n_id)); // filling the matrix
  for(j in 1:nb_random){
    eta[,j] = eta_tilde[,j]*eta_omega[j];
  }
  Vp = to_array_1d(mu_Vp * exp(eta[ ,1]));
  Tp = to_array_1d(mu_Tp * exp(eta[ ,2]));
  Tc = to_array_1d(mu_Tc * exp(eta[ ,3]));
  Tincub = to_array_1d(mu_Tincub * exp(eta[ ,4]));
  tinf = to_row_vector(tSS) + to_row_vector(Tincub);
  tp = to_row_vector(tinf) + to_row_vector(Tp);
    for(i in 1:n_id){  // Looping on each individual
      for (n in 1:Ntest[i]){  // Each observation of the individual
        Ct[i,n] = CtVinffun(time[i,n], tinf[i], lod, Vinf, tp[i], Tp[i], Tc[i], Vp[i]); // Prediction of the model
        if(time[i,n] < tinf[i]){ // Observations before the start of infection
            if(use_obs_ll[i,n] == 1){  // Data are uncensored
                num_arg[i,n,1] += log_0002 + ll_Pinf[i,1]; // proba of false positive
                num_arg[i,n,2] += log_0002 + ll_Pinf[i,2]; // proba of false positive
            } else{// Data are censored
                num_arg[i,n,1] += log_9998 + ll_Pinf[i,1]; // proba of true negative
                num_arg[i,n,2] += log_9998 + ll_Pinf[i,2]; // proba of true negative
            }
        } else{ // Observations after the start of infection
            if(use_obs_ll[i,n] == 1){ // Data are uncensored
                num_arg[i,n,1] += normal_lpdf(y[i,n] | Ct[i,n], sigma) + ll_Pinf[i,1]; // If infected 
                num_arg[i,n,2] += log_0002 + ll_Pinf[i,2]; // If not infected, proba of false positive 
            } else{ // Data are censored
                num_arg[i,n,1] += normal_lcdf(10 | Ct[i,n], sigma) + ll_Pinf[i,1]; // If infected 
                num_arg[i,n,2] += log_9998 + ll_Pinf[i,2]; // If not infected, proba of true negative 
            }
        }
    }
  }
}

model{
  // Priors //
  mu_Vp ~ normal(25, 0.5); // hierarchical mean (mu)
  mu_Tp ~ normal(6, 0.25); // hierarchical mean (mu)
  mu_Tc ~ normal(15, 0.5); // hierarchical mean (mu)
  mu_Tincub ~ normal(5, 0.25); // hierarchical mean (mu)
  Pinf ~ beta(2,2); // probability to be infected => proportion of infected individuals in the population
  sigma ~ std_normal(); // mesurement error
  to_vector(eta_tilde) ~ std_normal();
  to_vector(eta_omega) ~ normal(0.15,0.05); // variance of random effect 95% of density [0.05;0.25]
  for(i in 1:n_id){ // Likelihood (looped on each observation of each individual)
    for (n in 1:Ntest[i]){
      target += log_sum_exp(num_arg[i,n,1], num_arg[i,n,2]);
    }
  }
}

PS : @Bob_Carpenter I’d like to thank you for all your advice and tips on optimising my code. I’ll look into it, but only once I’ve got a model that works the way I want it to.

Thank you in advance,
Maxime

Topic		Replies	Views
Estimating parameters of a SEIR model Modeling fitting-issues , specification , epidemiology	28	3265	May 23, 2020
NLME model in STAN for missing + unbalanced longitudinal data Modeling rstan , mixed-model , missing-data	20	631	October 21, 2024
Error-in-variables regression with unobserved discrete predictors Modeling specification	11	1339	March 2, 2018
Convergence / latest version of Stan issues with our covid-19 model? Modeling fitting-issues , epidemiology , covid-19	84	5319	May 9, 2020
Issues with running Bayesian hierarchical reinforcement learning model in Stan Modeling rstan , fitting-issues , specification , performance	12	933	June 28, 2021

Add an unknown discrete parameter following a Bernoulli distribution

Related topics