Issues with non-centered specification (Matt trick)

Hi Folks,

I know this issue has been discussed time and again, but after over a week of reading online posts and trying this without success, here it comes.

I have problems implementing a non-centered specification of the variance in a 4 level hierarchical model. I have several observations (measurements), nested within individual-time means, nested within individual means, nested within district (“woreda”) means. The centered version is OK in the model empty of co-variates, but lowish n_eff (about 10% of samples) in short testing version (1000 warmeup + 200 samples on four chains), which decreases to about 7% in longer version. I am also afraid what might happen once the model becomes heavier unless I solve this.

The issue is that once I implement a non-centred version, the problem actually gets worse. In particular, n_eff results halved if I use the non-centered version shown below. I have also tried one including the means in the transformed parameters block–see third model below–but that results in equally bad performance, plus it takes about four times as long to converge (over 12 hours instead of 4 for the other two!) Hence my questions:

  1. is there something fundamentally wrong with the way I have written my model, or are there instances where using a non-centered specification can actually be bad?

  2. If I ever get the non-centered spec to work, would it make sense to implement this also for the heteroscedastic residual variance? And how can I best avoid that sigma dips into negative territory in that case?

Any suggestions would be highly appreciated!

Ferdinand

Here is the centered version:

data{
    int<lower=1> N;
    int<lower=1> N_id_year;
    int<lower=1> N_id;
    int<lower=1> N_woreda;
    vector[N] rt;
    int woreda[N];
    int id[N];
    int id_year[N];
}
parameters{
    real<lower=0> tau;
    vector[N_woreda] rt_w;
    vector[N_id] rt_s;
    vector[N_id_year] rt_t;
    real<lower=0,upper=1> a;
    vector[N_id] tau_s;
    vector[N_woreda] tau_w;
    real<lower=0> sigma_w;
    real<lower=0> sigma_s;
    real<lower=0> sigma_t;
    real<lower=0> sigma_tau_w;
    real<lower=0> sigma_tau_s;
}
model{
    vector[N] sigma;
    vector[N] mu;
    tau ~ cauchy( 0.25 , 0.25 );
    sigma_tau_s ~ cauchy( 0.25 , 0.25 );
    sigma_tau_w ~ cauchy( 0.25 , 0.25 );
    sigma_w ~ cauchy( 0.25 , 0.25 );
    sigma_s ~ cauchy( 0.25 , 0.25 );
    sigma_t ~ cauchy( 0.25 , 0.25 );
    tau_w ~ normal( 0 , sigma_tau_w );
    tau_s ~ normal( 0 , sigma_tau_s );
    rt_w ~ normal( 0 , sigma_w );
    rt_s ~ normal( 0 , sigma_s );
    rt_t ~ normal( 0 , sigma_t );
    a ~ normal( 0.5 , 0.25 );
    sigma = tau + tau_s[id] + tau_w[woreda];
    mu = a + rt_w[woreda] + rt_s[id] + rt_t[id_year];
    rt ~ normal( mu , sigma );
}
generated quantities{
    vector[N] sigma;
    vector[N] mu;
    real dev;
    dev = 0;
    sigma = tau + tau_w[woreda] + tau_s[id];
    mu = a + rt_w[woreda] + rt_s[id] + rt_t[id_year];
    dev = dev + (-2)*normal_lpdf( rt | mu , sigma );
}

And here the non-centered one:

data{
    int<lower=1> N;
    int<lower=1> N_id_year;
    int<lower=1> N_id;
    int<lower=1> N_woreda;
    vector[N] rt;
    int woreda[N];
    int id[N];
    int id_year[N];
}
parameters{
    real<lower=0> tau;
    vector[N_woreda] rt_w_tilde;
    vector[N_id] rt_s_tilde;
    vector[N_id_year] rt_t_tilde;
    real<lower=0,upper=1> a;
    vector[N_id] tau_s;
    vector[N_woreda] tau_w;
    real<lower=0> sigma_w;
    real<lower=0> sigma_s;
    real<lower=0> sigma_t;
    real<lower=0> sigma_tau_w;
    real<lower=0> sigma_tau_s;
}
transformed parameters{
    vector[N_woreda] rt_w;
    vector[N_id] rt_s;
    vector[N_id_year] rt_t;
    rt_w = sigma_w*rt_w_tilde;
    rt_s = sigma_s*rt_s_tilde;
    rt_t = sigma_t*rt_t_tilde;
}
model{
    vector[N] sigma;
    vector[N] mu;
    tau ~ cauchy( 0.25 , 0.25 );
    sigma_tau_s ~ cauchy( 0.25 , 0.25 );
    sigma_tau_w ~ cauchy( 0.25 , 0.25 );
    sigma_w ~ cauchy( 0.25 , 0.25 );
    sigma_s ~ cauchy( 0.25 , 0.25 );
    sigma_t ~ cauchy( 0.25 , 0.25 );
    tau_w ~ normal( 0 , sigma_tau_w );
    tau_s ~ normal( 0 , sigma_tau_s );
    rt_w_tilde ~ normal( 0 , 1 );
    rt_s_tilde ~ normal( 0 , 1 );
    rt_t_tilde ~ normal( 0 , 1 );
    a ~ normal( 0.5 , 0.25 );
    sigma = tau + tau_s[id] + tau_w[woreda];
    mu = a + rt_w[woreda] + rt_s[id] + rt_t[id_year];
    rt ~ normal( mu , sigma );
}
generated quantities{
    vector[N] sigma;
    vector[N] mu;
    real dev;
    dev = 0;
    sigma = tau + tau_w[woreda] + tau_s[id];
    mu = a + rt_w[woreda] + rt_s[id] + rt_t[id_year];
    dev = dev + (-2)*normal_lpdf( rt | mu , sigma );
}

Finally, trying to get the means into the transformed parameters block:

data{
    int<lower=1> N;
    int<lower=1> N_id_year;
    int<lower=1> N_id;
    int<lower=1> N_woreda;
    vector[N] rt;
    int woreda[N];
    int id[N];
    int id_year[N];
}
parameters{
    real<lower=0> tau;
    vector[N_woreda] rt_w_tilde;
    vector[N_id] rt_s_tilde;
    vector[N_id_year] rt_t_tilde;
    real<lower=0,upper=1> a;
    vector[N_id] tau_s;
    vector[N_woreda] tau_w;
    real<lower=0> sigma_w;
    real<lower=0> sigma_s;
    real<lower=0> sigma_t;
    real<lower=0> sigma_tau_w;
    real<lower=0> sigma_tau_s;
}
transformed parameters{
    vector[N_woreda] rt_w;
    vector[N_id] rt_s;
    vector[N_id_year] rt_t;
    rt_w[woreda] = a + sigma_w*rt_w_tilde[woreda];
    rt_s[id] = rt_w[woreda] + sigma_s*rt_s_tilde[id];
    rt_t[id_year] = rt_s[id] + sigma_t*rt_t_tilde[id_year];
}
model{
    vector[N] sigma;
    tau ~ cauchy( 0.25 , 0.25 );
    sigma_tau_s ~ cauchy( 0 , 0.25 );
    sigma_tau_w ~ cauchy( 0 , 0.25 );
    sigma_w ~ cauchy( 0.25 , 0.25 );
    sigma_s ~ cauchy( 0.25 , 0.25 );
    sigma_t ~ cauchy( 0.25 , 0.25 );
    tau_w ~ normal( 0 , sigma_tau_w );
    tau_s ~ normal( 0 , sigma_tau_s );
    rt_w_tilde ~ normal( 0 , 1 );
    rt_s_tilde ~ normal( 0 , 1 );
    rt_t_tilde ~ normal( 0 , 1 );
    a ~ normal( 0.5 , 0.25 );
    sigma = tau + tau_s[id] + tau_w[woreda];
    rt ~ normal( rt_t[id_year] , sigma );
}
generated quantities{
    vector[N] sigma;
    real dev;
    dev = 0;
    sigma = tau + tau_w[woreda] + tau_s[id];
    dev = dev + (-2)*normal_lpdf( rt | rt_t[id_year] , sigma );
}