Hello everyone,
I’d like to model the intra-host viral load of each individual in my population. However, within my population, I have infected and uninfected individuals, but I can’t distinguish between them. So I want to estimate the infection status of each of my individuals and the probability of being infected in my population. To do this, I draw a probability of being infected Pinf~ Beta(2,2) and I draw in my infectious status for each observation to_vector(infected) ~ Bernoulli(Pinf).
However, I notice that Stan can’t model discrete parameters and that you have to ‘marginalize discrete unknowns out of the posterior distribution’, but I don’t know how to do that with a process as simple as mine (all the posts I’ve been able to find model a Poisson model, which is much more complex than my problem here).
Thanks in advance for the help!
Maxime
functions {
// Ctfun returns dCt, the delta Ct below Vinf:
real CtVinffun(real t, real tinf, int lod, int Vinf, real tp, real Tp, real Tc, real Vp){
if (t <= tinf)
return(Vinf); // Ct = 0
// Viral load rises before peak:
else if (t >= tinf && t <= tp)
return(Vp*(t-tinf)/Tp); // Tp = tp-tinf
// Viral load falls after peak:
else //(t > tp)
return(Vp+(lod-Vp)*(t-tp)/Tc); // Tc = tc-tp
}
}
data{
int<lower=0> N; // Number of concatenated data points
int<lower=0> n_id; // Number of individuals
int lod; // Limit of detection of Ct (Ct=10)
int Vinf; // Ct delta from lod at infection (Ct=0)
int<lower=0> id[N]; // Vector marking which datum belongs to which id
real t[N]; // Vector marking the time for each data point
real<lower=10, upper=50> y[N]; // Concatenated data vector
int censor[N]; // vector of censor indication
real tSS[n_id]; // Vector of individual time of symptom onset
int nb_random; // Total number of parameters with a random effect
}
transformed data {
real log_is_lod[N]; // log of "is the data is at the LOD value ? : 0 OR 1"
real log_is_obs[N]; // log of " if the data is below the LOD value ? : 0 OR 1"
for(i in 1:N){
if(censor[i]==1){
log_is_obs[i]=log(0);
log_is_lod[i]=log(1);
}
else{
log_is_obs[i]=log(1);
log_is_lod[i]=log(0);
}
}
}
parameters{
// parameters of Vp
real<lower=10, upper=50> mu_Vp;
// parameters of proliferation phase
real<lower=0> mu_Tp;
// parameters of clearance phase
real<lower=0> mu_Tc;
// parameters incubation period
real<lower=0, upper=14> mu_Tincub;
real<lower=0> sigma;
matrix[n_id,nb_random] eta_tilde; // random effect for Vp, Tp, Tc, and Tincub
real<lower=0> eta_omega[nb_random];
// Proportion of infected individuals in the population
real Pinf;
// Infection status for each individual
int infected[n_id];
}
transformed parameters{
real<lower=0, upper=14> Tincub[n_id]; // Incubation period cannot exceed 14 days
matrix[n_id,nb_random] eta; // matrix of random effects for Vp, Tp, Tc, and Tincub
real<lower=10, upper=50> Vp[n_id];
real<lower=0> Tp[n_id];
real<lower=0> Tc[n_id];
real tp[n_id];
real tinf[n_id];
real<upper=50> Ct[N];
real num_arg[N,2];
for(j in 1:nb_random){
eta[,j] = eta_tilde[,j]*eta_omega[j];
}
for(i in 1:n_id){
Vp[i] = mu_Vp*exp(eta[i,1]);
Tp[i] = mu_Tp*exp(eta[i,2]);
Tc[i] = mu_Tc*exp(eta[i,3]);
Tincub[i] = mu_Tincub*exp(eta[i,4]);
tinf[i] = tSS[i] - Tincub[i];
tp[i] = tinf[i] + Tp[i];
}
// Looping on each observation of the data set
for(i in 1:N){
// Prediction of the model
Ct[i] = CtVinffun(t[i], tinf[id[i]], lod, Vinf, tp[id[i]], Tp[id[i]], Tc[id[i]], Vp[id[i]]);
// LL for infected individuals //
if (infected[id[i]] == 1){
if (t[i] < tinf[id[i]]){ // Observations before the start of infection
num_arg[i,1] = log_is_obs[i] + log(0.0002); // likelihood that Ctobs value < LOD (false positive PCR test)
num_arg[i,2] = log_is_lod[i] + log(0.9998); // likelihood that Ctobs value == LOD (true negative PCR test)
} else{
num_arg[i,1] = log_is_obs[i] + normal_lpdf(y[i] | Ct[i], sigma); // likelihood that Ctobs value < LOD (positive PCR test)
num_arg[i,2] = log_is_lod[i] + normal_lcdf(10 | Ct[i], sigma); // likelihood that Ctobs value == LOD (negative PCR test)
}
// If the observation is not at the lod value, num_arg[i,2] is equal to -inf and will not be taken in the likelihood : target += log_sum_exp(num_arg[i]);
// Because log(exp(log(0)) + exp(num_arg[i,1])) = num_arg[i,1]
// LL for non-infected individuals //
} else{
num_arg[i,1] = log_is_obs[i] + log(0.0002); // likelihood that Ctobs value < LOD (false positive PCR test)
num_arg[i,2] = log_is_lod[i] + log(0.9998); // likelihood that Ctobs value == LOD (negative PCR test)
}
}
}
model{
// Priors //
mu_Vp ~ normal(25, 0.5); // hierarchical mean (mu)
mu_Tp ~ normal(6, 0.25); // hierarchical mean (mu)
mu_Tc ~ normal(15, 0.5); // hierarchical mean (mu)
mu_Tincub ~ normal(5, 0.25); // hierarchical mean (mu)
Pinf ~ beta(2,2); // probability to be infected => proportion of infected individuals in the population
to_vector(infected) ~ bernoulli(Pinf); // is the individual is infected
sigma ~ std_normal(); // mesurement error
to_vector(eta_tilde) ~ std_normal();
to_vector(eta_omega) ~ normal(0.15,0.05); // variance of random effect 95% of density [0.05;0.25]
// Likelihood (looped on each observation) //
for(i in 1:N){
target += log_sum_exp(num_arg[i]);
}
}