Low E-BFMI: Reparameterization causing problems (low ESS, high R-hat)

I am adapting a model from this paper to model a different, but related, set of data.

I’m getting a low E-BFMI warning (0.01), which I think is coming from my sd parameters:

energy_dpsd

This is the code for my first model:

functions {
  // mufun returns dCt, the delta Ct below the LOD:
  real mufun(real t, real tp, real wp, real wr, real dp){
    // Viral load rises between onset and peak: 
    if(t>(tp-wp) && t<=tp)
      return(dp/wp*(t-(tp-wp)));
    // Viral load falls between peak and recovery: 
    else if(t>tp && t<=(tp+wr))
      return(dp - dp/wr*(t-tp));
    // Ct = lod after recovery: 
    else 
      return(0);
  }
}

data {
   int<lower=0> N;                        // Number of concatenated data points
   int<lower=0> n_id;                     // Number of individuals 
   real<lower=0> lod;                     // Limit of detection
   array[N] int<lower=0> id;             // Vector marking which datum belongs to which id
   array[n_id] int<lower=0> symp;        // Vector marking symptomatic ids
   vector[N] t;                           // Vector marking the time for each data point 
   vector<lower=0, upper=lod>[N] y;       // Concatenated data vector 
   real<lower=0> tpsd;                    // Prior sd for the onset time (days)
   real<lower=0> dpmean_prior;            // Prior mean peak Ct (delta from lod)
   real<lower=0> dpsd_prior;              // Prior sd peak Ct (delta from lod)
   real<lower=0> wpmax;                   // Max proliferation duration
   real<lower=0> wpmean_prior;            // Prior mean proliferation duration
   real<lower=0> wpsd_prior;              // Prior sd proliferation duration
   real<lower=0> wrmax;                   // Prior max clearance duration
   real<lower=0> wrmean_prior;            // Prior mean clearance duration 
   real<lower=0> wrsd_prior;              // Prior sd clearance duration 
   real<lower=0> sigma_max;               // Max allowable value for observation noise
   real<lower=0> sigma_prior_scale;       // Prior observation noise Cauchy scale
   real<lower=0, upper=1> lambda;         // Mixture probability (~1-sensitivity)
   real<lower=0> fpmean;                  // False positive mean Ct
}

transformed data {
   vector<lower=0, upper=lod>[N] ydrop;  // Concatenated deviation from LOD 

   real loglambda;
   real log1mlambda;

   real dpcauchypriorscale;
   real wpcauchypriorscale;
   real wrcauchypriorscale;

   for(i in 1:N){
     ydrop[i] = lod-y[i];
   }

   loglambda = log(lambda);
   log1mlambda = log1m(lambda);

  // Define cauchy prior scales so thatt 90% of the half-distribution mass lies below the max cutoff for that parameter. 
   //dpcauchypriorscale = lod/tan(pi()*(0.95-0.5));
   //wpcauchypriorscale = wpmax/tan(pi()*(0.95-0.5));
   //wrcauchypriorscale = wrmax/tan(pi()*(0.95-0.5));
   
   dpcauchypriorscale = 3;
   wpcauchypriorscale = 3;
   wrcauchypriorscale = 3;

}

parameters {

   real<lower=0, upper=lod> dpmean;    // Poplation peak Ct drop mean
   real<lower=0, upper=wpmax> wpmean;  // Population onset-to-peak time mean
   real<lower=0, upper=wrmax> wrmean;  // Population peak-to-recovery time mean 

   real<lower=0> dpsd;          // Poplation peak Ct drop sd
   real<lower=0> wpsd;          // Population onset-to-peak time sd
   real<lower=0> wrsd;          // Population peak-to-recovery time sd

   vector[n_id] tp;                        // Peak time
   vector<lower=0, upper=lod>[n_id] dp;    // Peak Ct drop
   vector<lower=0, upper=wpmax>[n_id] wp;  // Onset-to-peak time
   vector<lower=0, upper=wrmax>[n_id] wr;  // Peak-to-recovery time 

   real<lower=0, upper=sigma_max> sigma;    // Process noise during infection
}

transformed parameters {
   vector[N] process_sd;
   vector[N] mu;
  
  for(i in 1:N){
    mu[i]=mufun(t[i], tp[id[i]], wp[id[i]], wr[id[i]], dp[id[i]]);
    process_sd[i]=sqrt(sigma*sigma);
  };
}

model {

  // Hierarchical priors:
  dpmean ~ normal(dpmean_prior,dpsd_prior) T[0,lod];
  
  wpmean ~ normal(wpmean_prior, wpsd_prior) T[0,wpmax];

  wrmean ~ normal(wrmean_prior, wrsd_prior) T[0,wrmax];

  dpsd ~ normal(0,dpcauchypriorscale) T[0,];
  wpsd ~ normal(0,wpcauchypriorscale) T[0,];
  wrsd ~ normal(0,wrcauchypriorscale) T[0,];  

  sigma ~ normal(0,sigma_prior_scale) T[0,10];

  // Individual parameter specifications:
  tp ~ normal(0,tpsd);
  for(i in 1:n_id){
      dp[i] ~ normal(dpmean, dpsd) T[0, lod];
      wp[i] ~ normal(wpmean, wpsd) T[0, wpmax];
      wr[i] ~ normal(wrmean, wrsd) T[0, wrmax];
  } 

  // Main model specification: 
  for(i in 1:N){

    target += log_sum_exp(
      log1mlambda + normal_lpdf(ydrop[i] | mu[i], process_sd[i]),
      loglambda + exponential_lpdf(ydrop[i] | 1/fpmean));

    if (ydrop[i] < 0 || ydrop[i] > lod)
      target += negative_infinity();
    else
      target += -log_diff_exp(
        log1mlambda + normal_lcdf(lod | mu[i], process_sd[i]),
        log1mlambda + normal_lcdf(0 | mu[i], process_sd[i]));

    }

}

This gives me low E-BFMI (0.01), and a couple of group-level parameters with high R-hat.

I attempted a lognormal non-centred re-parameterization of (dp, wp, and wr)

functions {
  // mufun returns dCt, the delta Ct below the LOD:
  real mufun(real t, real tp, real wp, real wr, real dp){
    // Viral load rises between onset and peak: 
    if(t>(tp-wp) && t<=tp)
      return(dp/wp*(t-(tp-wp)));
    // Viral load falls between peak and recovery: 
    else if(t>tp && t<=(tp+wr))
      return(dp - dp/wr*(t-tp));
    // Ct = lod after recovery: 
    else 
      return(0);
  }
}

data {
   int<lower=0> N;                        // Number of concatenated data points
   int<lower=0> n_id;                     // Number of individuals 
   real<lower=0> lod;                     // Limit of detection
   array[N] int<lower=0> id;             // Vector marking which datum belongs to which id
   array[n_id] int<lower=0> symp;        // Vector marking symptomatic ids
   vector[N] t;                           // Vector marking the time for each data point 
   vector<lower=0, upper=lod>[N] y;       // Concatenated data vector 
   real<lower=0> tpsd;                    // Prior sd for the onset time (days)
   real<lower=0> dpmean_prior;            // Prior mean peak Ct (delta from lod)
   real<lower=0> dpsd_prior;              // Prior sd peak Ct (delta from lod)
   real<lower=0> wpmax;                   // Max proliferation duration
   real<lower=0> wpmean_prior;            // Prior mean proliferation duration
   real<lower=0> wpsd_prior;              // Prior sd proliferation duration
   real<lower=0> wrmax;                   // Prior max clearance duration
   real<lower=0> wrmean_prior;            // Prior mean clearance duration 
   real<lower=0> wrsd_prior;              // Prior sd clearance duration 
   real<lower=0> sigma_max;               // Max allowable value for observation noise
   real<lower=0> sigma_prior_scale;       // Prior observation noise Cauchy scale
   real<lower=0, upper=1> lambda;         // Mixture probability (~1-sensitivity)
   real<lower=0> fpmean;                  // False positive mean Ct
}

transformed data {
   vector<lower=0, upper=lod>[N] ydrop;  // Concatenated deviation from LOD 

   real loglambda;
   real log1mlambda;

   real dpcauchypriorscale;
   real wpcauchypriorscale;
   real wrcauchypriorscale;

   for(i in 1:N){
     ydrop[i] = lod-y[i];
   }

   loglambda = log(lambda);
   log1mlambda = log1m(lambda);

  // Define cauchy prior scales so thatt 90% of the half-distribution mass lies below the max cutoff for that parameter. 
   //dpcauchypriorscale = lod/tan(pi()*(0.95-0.5));
   //wpcauchypriorscale = wpmax/tan(pi()*(0.95-0.5));
   //wrcauchypriorscale = wrmax/tan(pi()*(0.95-0.5));
   
   dpcauchypriorscale = 3;
   wpcauchypriorscale = 3;
   wrcauchypriorscale = 3;

}

parameters {

   vector[n_id] z_id;

   real<lower=0, upper=lod> dpmean;    // Poplation peak Ct drop mean
   real<lower=0, upper=wpmax> wpmean;  // Population onset-to-peak time mean
   real<lower=0, upper=wrmax> wrmean;  // Population peak-to-recovery time mean 

   real<lower=0> dpsd;          // Poplation peak Ct drop sd
   real<lower=0> wpsd;          // Population onset-to-peak time sd
   real<lower=0> wrsd;          // Population peak-to-recovery time sd

   vector[n_id] tp;                        // Peak time

   real<lower=0, upper=sigma_max> sigma;    // Process noise during infection
}

transformed parameters {
   vector[N] process_sd;
   vector[N] mu;
   vector<lower=0>[n_id] dp;    // Peak Ct drop
   vector<lower=0>[n_id] wp;  // Onset-to-peak time
   vector<lower=0>[n_id] wr;  // Peak-to-recovery time 
   
   // Individual parameter specifications:
      dp[id] = exp(log(dpmean) + (log(dpsd) * z_id[id])) ;
      wp[id] = exp(log(wpmean) + (log(wpsd) * z_id[id])) ;
      wr[id] = exp(log(wrmean) + (log(wrsd) * z_id[id])) ;
 
  
  for(i in 1:N){
    mu[i]=mufun(t[i], tp[id[i]], wp[id[i]], wr[id[i]], dp[id[i]]);
    process_sd[i]=sqrt(sigma*sigma);
  };
}

model {

  // Hierarchical priors:
  z_id ~ std_normal();  
  tp ~ normal(0,tpsd);
  
  dpmean ~ normal(dpmean_prior,dpsd_prior) T[0,lod];
  
  wpmean ~ normal(wpmean_prior, wpsd_prior) T[0,wpmax];

  wrmean ~ normal(wrmean_prior, wrsd_prior) T[0,wrmax];

  dpsd ~ normal(0,dpcauchypriorscale) T[0,];
  wpsd ~ normal(0,wpcauchypriorscale) T[0,];
  wrsd ~ normal(0,wrcauchypriorscale) T[0,];  

  sigma ~ normal(0,sigma_prior_scale) T[0,10];

  

  // Main model specification: 
  for(i in 1:N){

    target += log_sum_exp(
      log1mlambda + normal_lpdf(ydrop[i] | mu[i], process_sd[i]),
      loglambda + exponential_lpdf(ydrop[i] | 1/fpmean));

    if (ydrop[i] < 0 || ydrop[i] > lod)
      target += negative_infinity();
    else
      target += -log_diff_exp(
        log1mlambda + normal_lcdf(lod | mu[i], process_sd[i]),
        log1mlambda + normal_lcdf(0 | mu[i], process_sd[i]));

    }

}


This increases E-BFMI, slightly (0.02), but results in low effective draws an high R-hat for most parameters:

Such high values indicate incomplete mixing and biased estimation.
You should consider regularizating your model with additional prior information or a more effective parameterization.

I’m at a loss for what else I can try.