In my random intercept model, I’m debating getting rid of the population level intercept.
So instead of having something like:
data {
int<lower=1> N; //data points
real Y[N]; //dependent variable
int<lower=1> Ng; //num of groups
int<lower=1, upper=Ng> grpID[N]; // Group Lookup
real x1[N]; //predictor
}
parameters {
//Population
real beta_0; //intercept
real beta_1;
real<lower=0> sigma;
//Group NCP
real grp_mu_raw[Ng];
real grp_mu_bar;
real<lower=0> grp_mu_sigma;
}
transformed parameters {
real grp_mu[Ng]; //intercept for each group
for (i in 1:Ng) {
grp_mu[i] = grp_mu_bar + grp_mu_raw[i] * grp_mu_sigma;
}
}
model {
//priors
beta_0 ~ normal(3, 3);
beta_1 ~ normal(0, 1);
sigma ~ normal(0, 3);
grp_mu_bar ~ normal(0, 1);
grp_mu_raw ~ normal(0, 1);
grp_mu_sigma ~ normal(0, 1);
//likelihood
real mu[N];
for (i in 1:N) {
mu[i] = beta_0 + grp_mu[grpID[i]] + beta_1 * X1[i];
}
Y ~ normal(mu,sigma);
}
changing it to:
data {
int<lower=1> N; //data points
real Y[N]; //dependent variable
int<lower=1> Ng; //num of groups
int<lower=1, upper=Ng> grpID[N]; // Group Lookup
real x1[N]; //predictor
}
parameters {
//Population
real beta_1;
real<lower=0> sigma;
//Group NCP
real grp_mu_raw[Ng];
real grp_mu_bar;
real<lower=0> grp_mu_sigma;
}
transformed parameters {
real grp_mu[Ng]; //intercept for each group
for (i in 1:Ng) {
grp_mu[i] = grp_mu_bar + grp_mu_raw[i] * grp_mu_sigma;
}
}
model {
//priors
beta_1 ~ normal(0, 1);
sigma ~ normal(0, 3);
grp_mu_bar ~ normal(3, 3);
grp_mu_raw ~ normal(0, 1);
grp_mu_sigma ~ normal(0, 1);
//likelihood
real mu[N];
for (i in 1:N) {
mu[i] = grp_mu[grpID[i]] + beta_1 * X1[i];
}
Y ~ normal(mu,sigma);
}
Where the population level intercept has been dropped and the grp_mu_bar
prior has been increased to reflect that.
Are there trade-offs to the different approaches? Or is one strictly better than the other?
I’ve tried running both models and the latter option initially seems to converge better, perhaps because you remove an unnecessary variable beta_0
, one which will strongly correlate with grp_mu_bar
.