Hi all,
I’ve been trying to simplify and modify the model described here, see section “2.4 Viral load and infectiousness time course”.
From what I understand, this is a piecewise linear model with time series data from different individuals. The model is characterized by a steep linear increase followed by a slower linear decrease, with the peak (/intercept) coinciding with day 0. One special characteristic is that only the number of days between an individual’s data points are known but not where exactly they are placed in time, so the model has a shift
parameter for each time-series to estimate its positioning on the x-axis.
I’ve edited the model by removing all non-essential parameters that can affect the slopes and intercept (like age, gender, hospitalisation etc.). Also, I have replaced the imputation of censored data points by integrating them out (see here, section “Integrating out Censored Values”) and have modified some priors slightly.
I am simulating and trying to fit fake data. This works reasonably well (although Rhat values are not that great), however, I have noticed that the model does not place many data points around day 0. You can see this in the plots below, which show, from top to bottom: 1) the estimated trajectories for each individual in blue and the mean trajectory in red, 2) the estimated positioning of the corresponding data points, 3) the true trajectories and 4) the true positioning of the data points.
When I make histograms of the estimated placements in time vs the true ones, you can clearly see the dip around day 0 in the estimation.
I am wondering why this happens and if there is anything I can try to mitigate the problem. Is this just a hard area to fit because of the sudden change from up-slope to down-slope. I.e., small time shifts can lead to big changes in the likelihood? Also, any other critisism of the model is welcome (see stan code below).
Thanks in advance!
data {
// Data for time course analysis
int N_DAY_pos; // number of data points with log10 viral load > 2 (i.e. positive PCRs)
vector<lower=2>[N_DAY_pos] Y_DAY_pos; // outcome log10 viral load with values > 2
vector[N_DAY_pos] X_DAY_pos; // day of measurement in time series for positive PCRs (with day 0 assigned to an individual's first PCR)
int G; // number of individuals
int G_neg; // number of individuals with negative PCR tests (i.e. censored data points)
int gstart_DAY_pos[G]; // first day in time series (positive PCRs) for each individual wrt X_DAY_pos
int gend_DAY_pos[G]; // last day in time series (positive PCRs) for each individual wrt X_DAY_pos
int N_NegTests; // total number of negative test results (i.e. censored data points)
vector[N_NegTests] X_DAY_neg; // day of measurement in time series for negative PCRs (i.e. censored data points)
int N_NegTests_individual[G]; // a vector containing the number of negative PCR tests for each individual
int gstart_DAY_neg[G_neg]; // first day in time series (negative PCRs) for each individual wrt X_DAY_neg
int gend_DAY_neg[G_neg]; // last day in time series (negative PCRs) for each individual wrt X_DAY_neg
int condition_on_data; // 1 for parameter estimation, 0 for prior predictive plots
}
parameters {
real<upper=min(Y_DAY_pos)> L; # limit for censoring
// grand means for key model parameters
real log_slope_up_mu;
real log_slope_down_mu;
real log_intercept_mu;
// individual random effects for key model parameters
real<lower=0> intercept_sigma;
vector[G] intercept_raw;
real<lower=0> slope_up_sigma;
vector[G] slope_up_raw;
real<lower=0> slope_down_sigma;
vector[G] slope_down_raw;
real<lower=0> sigma_sigma;
vector[G] sigma_raw;
// smoothness parameter for logistic weight function
real<lower=5> beta_sweight_mu;
// shifts for individuals
vector<lower=-15,upper=20>[G] shift;
// error variance
real sigma_mu; // mean
}
transformed parameters {
// error variance
vector[G] sigma_sigma_times_raw = sigma_sigma * sigma_raw;
vector[G] sigma = exp(sigma_mu + sigma_sigma_times_raw);
// vectors for individual level parameters
vector<lower=0>[G] intercept;
vector<lower=0>[G] slope_up;
vector<upper=0>[G] slope_down;
vector<lower=0>[G] time2peak;
{ // calculate model parameters
vector[G] intercept_individual = intercept_sigma * intercept_raw;
vector[G] slope_up_individual = slope_up_sigma * slope_up_raw;
vector[G] slope_down_individual = slope_down_sigma * slope_down_raw;
vector[G] log_intercept =
log_intercept_mu + // intercept (fixed effect [FE], group level mean)
intercept_individual; // individual level random effects
vector[G] log_slope_up =
log_slope_up_mu +
slope_up_individual;
vector[G] log_slope_down =
log_slope_down_mu +
slope_down_individual;
// apply link function
intercept = exp(log_intercept);
slope_up = exp(log_slope_up);
slope_down = -exp(log_slope_down);
}
time2peak = intercept ./ slope_up;
}
model {
// DAY
log_intercept_mu ~ normal(2.1,.1);
log_slope_up_mu ~ normal(.8,.25);
log_slope_down_mu ~ normal(-1.5,.5);
beta_sweight_mu ~ normal(10,1);
intercept_sigma ~ gamma(4,10);
intercept_raw ~ std_normal();
slope_up_sigma ~ gamma(4,10);
slope_up_raw ~ std_normal();
slope_down_sigma ~ gamma(4,10);
slope_down_raw ~ std_normal();
sigma_mu ~ std_normal();
sigma_sigma ~ std_normal();
sigma_raw ~ std_normal();
if (condition_on_data == 1) {
// Counter of individuals that contain negative PCR tests (i.e. censored data points).
int g2 = 0;
# iterating over individuals
for (g in 1:G) {
# Positive PCR tests
int N_g = gend_DAY_pos[g] - gstart_DAY_pos[g] + 1; // Number of positive PCRs
vector[N_g] DAY_shifted1 = X_DAY_pos[gstart_DAY_pos[g]:gend_DAY_pos[g]] + shift[g];
vector[N_g] weight_down1 = inv_logit(DAY_shifted1 * beta_sweight_mu);
vector[N_g] est_up1 = intercept[g] + slope_up[g]*DAY_shifted1;
vector[N_g] est_down1 = intercept[g] + slope_down[g]*DAY_shifted1;
vector[N_g] yhat1 = (1-weight_down1) .* est_up1 + weight_down1 .* est_down1;
target += normal_lpdf(Y_DAY_pos[gstart_DAY_pos[g]:gend_DAY_pos[g]] | yhat1, sigma[g]);
// Negative PCR tests (censored data points)
int N_neg_tests = N_NegTests_individual[g];
if (N_neg_tests > 0){
g2 = g2 + 1;
vector[N_neg_tests] DAY_shifted1_neg = X_DAY_neg[gstart_DAY_neg[g2]:gend_DAY_neg[g2]] + shift[g];
vector[N_neg_tests] weight_down1_neg = inv_logit(DAY_shifted1_neg * beta_sweight_mu);
vector[N_neg_tests] est_up1_neg = intercept[g] + slope_up[g]*DAY_shifted1_neg;
vector[N_neg_tests] est_down1_neg = intercept[g] + slope_down[g]*DAY_shifted1_neg;
vector[N_neg_tests] yhat1_neg = (1-weight_down1_neg) .* est_up1_neg + weight_down1_neg .* est_down1_neg;
target += normal_lcdf(rep_vector(L, N_neg_tests) | yhat1_neg, sigma[g]); // integrating out censored data points
}
}
}
}
generated quantities {
real slope_up_mu = exp(log_slope_up_mu);
real slope_down_mu = exp(log_slope_down_mu);
real intercept_mu = exp(log_intercept_mu);
real time2peak_mu = intercept_mu/slope_up_mu;
}