Hi, I’m running Heckman selection models inspired by the awesome work of @rtrangucci and @RachaelMeager.
I’m modeling four decisions wiht two Heckman models (one binary selection decision, and one continous amount decision; in each Heckman model respectively). I started running the two Heckman models with separate files and everything converged well. Now I created one joint Stan file with the two Heckman models to be able to correlate the individual-specific intercepts of all four equations. I now have an identification issue with two parameters, while I achieve good convergence for other parameters.
One interesting thing I note is that the traceplot of…
- rho1, that is, the correlation between the selection and amount equation of the first Heckman model, and
- alpha0[1], that is, the average intercept of the selection equation of the first Heckman model
…behave similarly, but mirror-inverted.
Here are the traceplots:
Putting rho1 and alpha0[1] next to each other and mirror-inverting rho1:
And some more traceplots that mostly look okay (I hope):
b1.pdf (101.0 KB)
b2.pdf (100.8 KB)
b3.pdf (70.7 KB)
b4.pdf (71.0 KB)
rho2.pdf (16.6 KB)
And here is the model:
data {
int<lower = 1> N_1; // Overall number of obvservations in the first Heckman
int<lower = 1> N_2; // Overall number of obvservations in the second Heckman
int<lower = 1> N_neg1; // Number of observations in the first Heckman model that are censored
int<lower = 1> N_pos1; // Number of observations in the first Heckman model that are non-censored
int<lower = 1> N_neg2; // Number of observations in the second Heckman model that are censored
int<lower = 1> N_pos2; // Number of observations in the second Heckman model that are non-censored
int<lower = 1> K1_1; // number of covariates in selection and amount stage, respectively
int<lower = 1> K1_2; // number of covariates in selection and amount stage, respectively
int<lower = 1> K2; // number of weekday dummies
int<lower = 1> H; // number of individuals
int<lower = 1> J; // number of outcomes
int<lower = 1> id_1[N_1]; // identifier of individuals in the first Heckman
int<lower = 1> id_2[N_2]; // identifier of individuals in the second Heckman
vector[N_pos1] y1; // amount in the first Heckman
int<lower = 0, upper = 1> y2[N_1]; // selection indicator in the first Heckman model
vector[N_pos2] y3; // amount in the second Heckman
int<lower = 0, upper = 1> y4[N_2]; // selection indicator in the second Heckman model
matrix[N_pos1, K1_1] X1_1;
matrix[N_1, K1_1] X2_1;
matrix[N_pos2, K1_2] X1_2;
matrix[N_2, K1_2] X2_2;
matrix[N_pos1, K2] WD1_1;
matrix[N_1, K2] WD2_1;
matrix[N_pos2, K2] WD1_2;
matrix[N_2, K2] WD2_2;
}
transformed data {
int<lower=1,upper=N_1> n_pos1[N_pos1];
int<lower=0,upper=N_1> n_neg1[N_neg1];
int<lower=1,upper=N_2> n_pos2[N_pos2];
int<lower=0,upper=N_2> n_neg2[N_neg2];
{
int i;
int j;
int k;
int l;
i = 1;
j = 1;
k = 1;
l = 1;
for (n in 1:N_1) {
if (y2[n] == 1) {
n_pos1[i] = n;
i = i + 1;
} if (y2[n] == 0) {
n_neg1[j] = n;
j = j + 1;
}
}
for (n in 1:N_2) {
if (y4[n] == 1) {
n_pos2[k] = n;
k = k + 1;
} if (y4[n] == 0) {
n_neg2[l] = n;
l = l + 1;
}
}
}
}
parameters {
real<lower=-1, upper=1> rho1; // correlation in the first Heckman model
real<lower=-1, upper=1> rho2; // correlation in the second Heckman model
vector[K1_1] b1;
vector[K1_1] b2;
vector[K1_2] b3;
vector[K1_2] b4;
vector[J] alpha0;
vector[J] alpha_raw[H];
cholesky_factor_corr[J] L_Omega_alpha;
vector<lower=0>[J] sigma_alpha;
real<lower=0> sd1;
real<lower=0> sd2;
}
transformed parameters {
vector[J] alpha[H];
for (i in 1:H){
alpha[i] = alpha0 + diag_pre_multiply(sigma_alpha, L_Omega_alpha) * alpha_raw[i];
}
}
model {
vector[N_1] mu_y2;
vector[N_pos1] mu_y1;
vector[N_2] mu_y4;
vector[N_pos2] mu_y3;
b1 ~ normal(0, 1);
b2 ~ normal(0, 1);
b3 ~ normal(0, 1);
b4 ~ normal(0, 1);
sd1 ~ normal(0, 1);
sd2 ~ normal(0, 1);
alpha0 ~ cauchy(0, 1);
L_Omega_alpha ~ lkj_corr_cholesky(1);
for(i in 1:H){
alpha_raw[i] ~ std_normal();
}
mu_y2 = X2_1 * b2;
mu_y1 = X1_1 * b1;
mu_y4 = X2_2 * b4;
mu_y3 = X1_2 * b3;
// Here starts the first Heckman model
for (n in 1:N_neg1) {
target += (log(Phi(-mu_y2[n_neg1[n]] - alpha[id_1[n_neg1[n]], 2])));
}
for (n in 1:N_pos1) {
target += log(Phi(sqrt(1 - rho1^2)^(-1)*(mu_y2[n_pos1[n]] + alpha[id_1[n_pos1[n]], 2]+
(rho1 / sd1)
* (y1[n] - mu_y1[n] - alpha[id_1[n_pos1[n]], 1]))));
y1[n] ~ normal(mu_y1[n]+ alpha[id_1[n_pos1[n]], 1], sd1);
}
// Here starts the second Heckman model
// if(y2[t] == 1){
for (n in 1:N_neg2) {
target += (log(Phi(-mu_y4[n_neg2[n]] - alpha[id_2[n_neg2[n]], 4])));
}
for (n in 1:N_pos2) {
target += log(Phi(sqrt(1 - rho2^2)^(-1)*(mu_y4[n_pos2[n]] + alpha[id_2[n_pos2[n]], 4]+
(rho2 / sd2)
* (y3[n] - mu_y3[n] - alpha[id_2[n_pos2[n]], 3]))));
y3[n] ~ normal(mu_y3[n]+ alpha[id_2[n_pos2[n]], 3], sd2);
// }
}
}
Any ideas what’s going on?
I already applied the non-centered parameterization on the correlated individual-specific intercepts, which at least improved convergence a bit.
I highly appreciate any comments!