Difficulties in hierarchical fit of nonlinear model

HI all,

I am trying to fit a nonlinear hierarchical model to data consisting of 53 total points in 11 data sets, with 1 to 14 points per set. The datasets include 3 different types of graphite, with the number of data sets per type = 5, 4, and 2, respectively, and the number of data points per type = 22, 10, and 21, respectively.

The model is a semi-empirical nonlinear function with four parameters: Q, lambda, muu, and Eth.

I started with a 2-level model where the population parameters are for all graphite types and the 2nd level consists of distinct parameters for each type of graphite. I initially had issues with divergent transitions, but resolved these with normal priors for the parameters with a non-centered parameterization for the means and weakly informative exponential priors for the sigmas. For some reason uniform distributions, uniform distributions avoiding zero, and non-centered half-Cauchy distributions for the sigmas led to divergences, even with modified adapt_delta and stepsize inputs.

Because the individual datasets are from different laboratories which used different measurement methods, I would like to add a 3rd level to estimate model parameters for each dataset. A non-centered parameterization led to lots of divergences (1400 out of 20000 iterations over 4 chains), even with aggressive adapt_delta and stepsize values. I was able to reduce the divergences by about an order of magnitude by fixing 2 of the four model parameters (lambda and muu) at the top-level population values, but the fits are not particularly good; these parameters really need to vary with graphite type.

I’m attaching the data, the stan model, and some pairs plots. The sigmas result in problematic pointy funnels, but I’m not sure why non-centering doesn’t resolve this. Any help is appreciated!
fitdata.R (2.3 KB)

// Eckstein model function
functions{
  real ecksteinMean(real aE, real aQ, real alambda, real amuu, real aEth) {
    real Zi = 54.0; // Atomic number of projectile
    real Zs = 6.0;		// Atomic number of target
    real Mi = 131.3;		// Atomic mass of projectile
    real Ms = 12.0;		// Atomic mass of target
    real ao = 0.529;  // Bohr radius in angstroms
    real epso = 1.42e-40;  // Permittivity of free space in C2/eV/Å
    real echg = 1.602e-19;		// electron charge in C
	  
    real aL;
    real eps;
    real ww;
    real snkc;
    real lnyield;

     // Lindhard screening length in angstroms
     aL = (9.0 * pi() ^ 2.0 / 128.0) ^ (1.0/3.0) * ao * (Zi ^ (2.0/3.0) + Zs ^ (2.0/3.0)) ^ (-1.0/2.0);
    // Reduced energy
    eps = aL / (Zi * Zs) * (4.0 * pi() * epso / echg ^ 2 ) * (Ms / (Mi + Ms)) * aE;
    // Eckstein yield formula parameter
    ww = eps + 0.1728 * sqrt(eps) + 0.008 * eps ^ (0.1504);
     // Reduced nuclear stopping power based on K-C potential
     snkc = 0.5 * log(1.0 + 1.2288 * eps) / ww;
    // ln(yield) function:
     lnyield = log(aQ * snkc * (aE / aEth - 1.0) ^ amuu / (alambda + (aE / aEth - 1.0) ^ amuu));
    return(lnyield);
  }
}

data {
  int<lower = 1> Ns; // Number of samples
  int<lower = 1> Nds; // Number of datasets 
  int<lower = 1> Nt;  // Number of graphite types 
  vector[Ns] lnY;  // log(Sputter yield)
  vector[Ns] E;     // incident ion energy
  real EthPriorSig;  // SD for Eth prior dist
  int type[Nds];  // Integer representing the type of graphite  that an individual dataset belongs to 
  int dataset[Ns];  // Integer representing the dataset that an individual measurement belongs to
  int<lower = 1> N_test;
  vector[N_test] E_test;
}

parameters {
  // Hyperparameters (non-centered) for top level (all graphite types) distributions
  real<lower=0> mu_Q;   // Average value of Q for groups
  real<lower=0> mu_lambda; 
  real<lower=0> mu_muu; 
  real<lower=0> mu_Eth; 
  vector[Nt] Q_raw;     // Normalized Q distribution (for non-centered reparameterization)
  vector[Nt] lambda_raw; 
  vector[Nt] muu_raw; 
  vector[Nt] Eth_raw; 
  real<lower = 0> sigma_Q;    // Hyperparameters for sigmas
  real<lower = 0> sigma_lambda; 
  real<lower = 0> sigma_muu; 
  real<lower = 0> sigma_Eth; 
  // Hyperparameters for second level means
  vector[Nds] Qt_raw; 
  vector[Nds] lambdat_raw; 
  vector[Nds] muut_raw; 
  vector[Nds] Etht_raw; 
  real<lower = 0> sigma_Qt;    // Hyperparameters for type sigmas
  real<lower = 0> sigma_lambdat; 
  real<lower = 0> sigma_muut; 
  real<lower=0> sigma_Etht; 
  real<lower=-pi()/2, upper=pi()/2> sigma_unif;
}
transformed parameters {    // non-centered reparameterization
  // Parameters for second level (individual graphite types) distributions
  vector<lower=0>[Nt] mu_Qt;   
  vector<lower=0>[Nt] mu_lambdat; 
  vector<lower=0>[Nt] mu_muut; 
  vector<lower=0>[Nt] mu_Etht; 
  // Parameters for third level (individual datasets) distributions
  vector<lower=0>[Nds] Q;   
  vector<lower=0>[Nds] lambda; 
  vector<lower=0>[Nds] muu; 
  vector<lower=0>[Nds] Eth;  
  real<lower=0> sigma;
  // Reparameterized normal distributions (mu_Qt's in terms of top level distributions):
  mu_Qt = mu_Q + Q_raw * sigma_Q;  // implies Qt[i] ~ normal(mu_Q, sigma_Q)
  mu_lambdat = mu_lambda + lambda_raw * sigma_lambda;
  mu_muut = mu_muu + muu_raw * sigma_muu;
  mu_Etht = mu_Eth + Eth_raw * sigma_Eth;
  // Reparameterized normal distributions (Qs, etc. in terms of graphite type distributions):
  for (j in 1:Nds) {
    Q[j] = mu_Qt[type[j]] + Qt_raw[j] * sigma_Qt; // ~ normal(mu_Qt[type[j]],sigma_Qt);
    lambda[j] = mu_lambdat[type[j]] + lambdat_raw[j] * sigma_lambdat;
    muu[j] = mu_muut[type[j]] + muut_raw[j] * sigma_muut;
    Eth[j] = mu_Etht[type[j]] + Etht_raw[j] * sigma_Etht;
  }
  // Reparameterized cauchy distribution for sigma at dataset level
  sigma = 5 * tan(sigma_unif);  // sigma ~ cauchy(0,5) 
}
model {
  // Hyperpriors--parameters that define graphite non-centered population distribution 
  // (i.e., type parameters are sampled from these)
  mu_Q ~ normal(10,5); 
  mu_lambda ~ normal(50,25); 
  mu_muu ~ normal(2,1);
  mu_Eth ~ normal(36.5,EthPriorSig); 
  Q_raw ~ normal(0,1);
  lambda_raw ~ normal(0,1);  
  muu_raw ~ normal(0,1);
  Eth_raw ~ normal(0,1);
  sigma_Q ~ exponential(0.1); 
  sigma_lambda ~ exponential(0.02);  
  sigma_muu ~ exponential(1); 
  sigma_Eth ~ exponential(0.05); 
  // Hyperpriors--parameters that define graphite type means
  Qt_raw ~ normal(0,1);
  lambdat_raw ~ normal(0,1);
  muut_raw ~ normal(0,1);
  Etht_raw ~ normal(0,1);
  // Hyperpriors for graphite type sigmas
  sigma_Qt ~ exponential(0.1); 
  sigma_lambdat ~ exponential(0.02); 
  sigma_muut ~ exponential(1);  
  sigma_Etht ~ exponential(0.05); 
  // Prior
  sigma_unif ~ uniform(-pi()/2, pi()/2);
  // Likelihood
  for (i in 1:Ns) {
    lnY[i] ~ normal(ecksteinMean(E[i],Q[dataset[i]],lambda[dataset[i]],muu[dataset[i]],Eth[dataset[i]]), sigma);
  }
}

// Generate predicted values for PPC
generated quantities {
  real lny_pred[Nt,N_test];
  real lny_rep[Ns];
  real ln_sigma_Q;
  real ln_sigma_lambda;
  real ln_sigma_muu;
  real ln_sigma_Eth;
  real ln_sigma;
  ln_sigma_Q = log(sigma_Q);
  ln_sigma_lambda = log(sigma_lambda);
  ln_sigma_muu = log(sigma_muu);
  ln_sigma_Eth = log(sigma_Eth);
  ln_sigma = log(sigma);
  for (i in 1:Ns) {   
    lny_rep[i] = normal_rng(ecksteinMean(E[i],Q[dataset[i]],lambda[dataset[i]],muu[dataset[i]],Eth[dataset[i]]), sigma);
  }
  for (j in 1:Nt) {
    for (i in 1:N_test){ 
      if (E_test[i] < mu_Etht[j])
        lny_pred[j,i] = -16;
      else 
        lny_pred[j,i] = normal_rng(ecksteinMean(E_test[i],mu_Qt[j],mu_lambdat[j],mu_muut[j],mu_Etht[j]), sigma);
    }
  }
}