Hello!
Are there any examples on “arellano/bonhomme selection correction” implementation within Stan out there?
I don’t know of any and I assume you already tried Googling it.
If there’s a density written down somewhere for it, it should be straightforward to translate it into Stan if it uses operations that are defined in Stan.
1 Like
This is the paper (https://www.econstor.eu/bitstream/10419/130088/1/844765139.pdf)
There is an implemetation in Stata (https://www.stata.com/meeting/switzerland20/slides/Switzerland20_Biewen.pdf)
I tried this Stan code. Maybe some pro can improve it.
data {
int<lower=0> N; // Number of observations
int<lower=0> K1, K2; // Number of predictors
vector[N] y; // Outcome variable
matrix[N, K1] X; // Predictors for quantile regression
matrix[N, K2] Z; // Predictors for selection equation
array[N] int<lower=0, upper=1> S; // Selection indicator
real<lower=0, upper=1> q; // Quantile of interest
}
parameters {
vector[K1] beta; // Coefficients for quantile regression
vector[K2] gamma; // Coefficients for selection equation
real<lower=0> sigma;
real alpha, alpha_s;
real<lower=-1, upper=1> rho;
}
transformed parameters {
vector[N] theta = alpha_s + Z * gamma;
vector[N] lambda = alpha + X * beta;
}
model {
target += student_t_lpdf(alpha | 3, 0, 2.5);
target += student_t_lpdf(alpha_s | 3, 0, 2.5);
target += student_t_lpdf(sigma | 3, 0, 2.5)
- 1 * student_t_lccdf(0 | 3, 0, 2.5);
target += student_t_lpdf(rho | 3, 0, 2.5);
for (n in 1:N) {
if (S[n] == 1) {
real phi_inv_latent = (y[n] - lambda[n]) / sigma;
target += normal_lpdf(phi_inv_latent | 0, 1)
+ normal_lcdf((theta[n] - rho * phi_inv_latent) / sqrt(1 - rho^2) | 0, 1);
} else {
target += normal_lcdf(-theta[n] | 0, 1);
}
}
for (n in 1:N) {
real u = (y[n] - lambda[n]) / sigma;
if (u >= 0) {
target += log(q) - log(sigma);
} else {
target += log(1-q) - log(sigma);
}
}
}
This is the R code:
data <- read_csv("arhomesample.csv")
data <- data %>%
mutate(
select = ifelse(!is.na(wage), 1, 0),
wage = ifelse(is.na(wage), 0, wage)
) #%>% filter(wage > 0)
y1_s <- data$select
x1_s <- model.matrix( ~ -1 + education + age + married + children, data = data)
y1 <- data$wage
x1 <- model.matrix( ~ -1 + education + age, data = data)
#####
dt <- list(
q = 0.5,
N=length(y1),
K1=dim(x1)[2],
y=y1,
X=x1,
K2=dim(x1_s)[2],
S=y1_s,
Z=x1_s
)
This give coefficients close to the ones provided by Stata. The intercepts not so much, tho. Sign are correct.
Stata results with the provided dataset
arhomesample.csv (9.6 KB)
:
------------------------------------------------------------------------------
wage | Coefficient
-------------+----------------------------------------------------------------
select |
education | .0488129
age | .0498648
married | .3382993
children | .5919981
_cons | -3.035758
-------------+----------------------------------------------------------------
.5_quantile |
cons | 3.855848
education | .8755162
age | .1761324
-------------+----------------------------------------------------------------
_anc |
rho | -.5903345