HI all,
I am trying to fit a nonlinear hierarchical model to data consisting of 53 total points in 11 data sets, with 1 to 14 points per set. The datasets include 3 different types of graphite, with the number of data sets per type = 5, 4, and 2, respectively, and the number of data points per type = 22, 10, and 21, respectively.
The model is a semi-empirical nonlinear function with four parameters: Q, lambda, muu, and Eth.
I started with a 2-level model where the population parameters are for all graphite types and the 2nd level consists of distinct parameters for each type of graphite. I initially had issues with divergent transitions, but resolved these with normal priors for the parameters with a non-centered parameterization for the means and weakly informative exponential priors for the sigmas. For some reason uniform distributions, uniform distributions avoiding zero, and non-centered half-Cauchy distributions for the sigmas led to divergences, even with modified adapt_delta and stepsize inputs.
Because the individual datasets are from different laboratories which used different measurement methods, I would like to add a 3rd level to estimate model parameters for each dataset. A non-centered parameterization led to lots of divergences (1400 out of 20000 iterations over 4 chains), even with aggressive adapt_delta and stepsize values. I was able to reduce the divergences by about an order of magnitude by fixing 2 of the four model parameters (lambda and muu) at the top-level population values, but the fits are not particularly good; these parameters really need to vary with graphite type.
I’m attaching the data, the stan model, and some pairs plots. The sigmas result in problematic pointy funnels, but I’m not sure why non-centering doesn’t resolve this. Any help is appreciated!
fitdata.R (2.3 KB)
// Eckstein model function
functions{
real ecksteinMean(real aE, real aQ, real alambda, real amuu, real aEth) {
real Zi = 54.0; // Atomic number of projectile
real Zs = 6.0; // Atomic number of target
real Mi = 131.3; // Atomic mass of projectile
real Ms = 12.0; // Atomic mass of target
real ao = 0.529; // Bohr radius in angstroms
real epso = 1.42e-40; // Permittivity of free space in C2/eV/Å
real echg = 1.602e-19; // electron charge in C
real aL;
real eps;
real ww;
real snkc;
real lnyield;
// Lindhard screening length in angstroms
aL = (9.0 * pi() ^ 2.0 / 128.0) ^ (1.0/3.0) * ao * (Zi ^ (2.0/3.0) + Zs ^ (2.0/3.0)) ^ (-1.0/2.0);
// Reduced energy
eps = aL / (Zi * Zs) * (4.0 * pi() * epso / echg ^ 2 ) * (Ms / (Mi + Ms)) * aE;
// Eckstein yield formula parameter
ww = eps + 0.1728 * sqrt(eps) + 0.008 * eps ^ (0.1504);
// Reduced nuclear stopping power based on K-C potential
snkc = 0.5 * log(1.0 + 1.2288 * eps) / ww;
// ln(yield) function:
lnyield = log(aQ * snkc * (aE / aEth - 1.0) ^ amuu / (alambda + (aE / aEth - 1.0) ^ amuu));
return(lnyield);
}
}
data {
int<lower = 1> Ns; // Number of samples
int<lower = 1> Nds; // Number of datasets
int<lower = 1> Nt; // Number of graphite types
vector[Ns] lnY; // log(Sputter yield)
vector[Ns] E; // incident ion energy
real EthPriorSig; // SD for Eth prior dist
int type[Nds]; // Integer representing the type of graphite that an individual dataset belongs to
int dataset[Ns]; // Integer representing the dataset that an individual measurement belongs to
int<lower = 1> N_test;
vector[N_test] E_test;
}
parameters {
// Hyperparameters (non-centered) for top level (all graphite types) distributions
real<lower=0> mu_Q; // Average value of Q for groups
real<lower=0> mu_lambda;
real<lower=0> mu_muu;
real<lower=0> mu_Eth;
vector[Nt] Q_raw; // Normalized Q distribution (for non-centered reparameterization)
vector[Nt] lambda_raw;
vector[Nt] muu_raw;
vector[Nt] Eth_raw;
real<lower = 0> sigma_Q; // Hyperparameters for sigmas
real<lower = 0> sigma_lambda;
real<lower = 0> sigma_muu;
real<lower = 0> sigma_Eth;
// Hyperparameters for second level means
vector[Nds] Qt_raw;
vector[Nds] lambdat_raw;
vector[Nds] muut_raw;
vector[Nds] Etht_raw;
real<lower = 0> sigma_Qt; // Hyperparameters for type sigmas
real<lower = 0> sigma_lambdat;
real<lower = 0> sigma_muut;
real<lower=0> sigma_Etht;
real<lower=-pi()/2, upper=pi()/2> sigma_unif;
}
transformed parameters { // non-centered reparameterization
// Parameters for second level (individual graphite types) distributions
vector<lower=0>[Nt] mu_Qt;
vector<lower=0>[Nt] mu_lambdat;
vector<lower=0>[Nt] mu_muut;
vector<lower=0>[Nt] mu_Etht;
// Parameters for third level (individual datasets) distributions
vector<lower=0>[Nds] Q;
vector<lower=0>[Nds] lambda;
vector<lower=0>[Nds] muu;
vector<lower=0>[Nds] Eth;
real<lower=0> sigma;
// Reparameterized normal distributions (mu_Qt's in terms of top level distributions):
mu_Qt = mu_Q + Q_raw * sigma_Q; // implies Qt[i] ~ normal(mu_Q, sigma_Q)
mu_lambdat = mu_lambda + lambda_raw * sigma_lambda;
mu_muut = mu_muu + muu_raw * sigma_muu;
mu_Etht = mu_Eth + Eth_raw * sigma_Eth;
// Reparameterized normal distributions (Qs, etc. in terms of graphite type distributions):
for (j in 1:Nds) {
Q[j] = mu_Qt[type[j]] + Qt_raw[j] * sigma_Qt; // ~ normal(mu_Qt[type[j]],sigma_Qt);
lambda[j] = mu_lambdat[type[j]] + lambdat_raw[j] * sigma_lambdat;
muu[j] = mu_muut[type[j]] + muut_raw[j] * sigma_muut;
Eth[j] = mu_Etht[type[j]] + Etht_raw[j] * sigma_Etht;
}
// Reparameterized cauchy distribution for sigma at dataset level
sigma = 5 * tan(sigma_unif); // sigma ~ cauchy(0,5)
}
model {
// Hyperpriors--parameters that define graphite non-centered population distribution
// (i.e., type parameters are sampled from these)
mu_Q ~ normal(10,5);
mu_lambda ~ normal(50,25);
mu_muu ~ normal(2,1);
mu_Eth ~ normal(36.5,EthPriorSig);
Q_raw ~ normal(0,1);
lambda_raw ~ normal(0,1);
muu_raw ~ normal(0,1);
Eth_raw ~ normal(0,1);
sigma_Q ~ exponential(0.1);
sigma_lambda ~ exponential(0.02);
sigma_muu ~ exponential(1);
sigma_Eth ~ exponential(0.05);
// Hyperpriors--parameters that define graphite type means
Qt_raw ~ normal(0,1);
lambdat_raw ~ normal(0,1);
muut_raw ~ normal(0,1);
Etht_raw ~ normal(0,1);
// Hyperpriors for graphite type sigmas
sigma_Qt ~ exponential(0.1);
sigma_lambdat ~ exponential(0.02);
sigma_muut ~ exponential(1);
sigma_Etht ~ exponential(0.05);
// Prior
sigma_unif ~ uniform(-pi()/2, pi()/2);
// Likelihood
for (i in 1:Ns) {
lnY[i] ~ normal(ecksteinMean(E[i],Q[dataset[i]],lambda[dataset[i]],muu[dataset[i]],Eth[dataset[i]]), sigma);
}
}
// Generate predicted values for PPC
generated quantities {
real lny_pred[Nt,N_test];
real lny_rep[Ns];
real ln_sigma_Q;
real ln_sigma_lambda;
real ln_sigma_muu;
real ln_sigma_Eth;
real ln_sigma;
ln_sigma_Q = log(sigma_Q);
ln_sigma_lambda = log(sigma_lambda);
ln_sigma_muu = log(sigma_muu);
ln_sigma_Eth = log(sigma_Eth);
ln_sigma = log(sigma);
for (i in 1:Ns) {
lny_rep[i] = normal_rng(ecksteinMean(E[i],Q[dataset[i]],lambda[dataset[i]],muu[dataset[i]],Eth[dataset[i]]), sigma);
}
for (j in 1:Nt) {
for (i in 1:N_test){
if (E_test[i] < mu_Etht[j])
lny_pred[j,i] = -16;
else
lny_pred[j,i] = normal_rng(ecksteinMean(E_test[i],mu_Qt[j],mu_lambdat[j],mu_muut[j],mu_Etht[j]), sigma);
}
}
}