Hi everyone,
we try to build a linear regression model with three outcomes but we have a problem with including the (regularized) horseshoe prior.
There is no problem with incorporating the horseshoe prior for the univariate case or the multiple outcome setting without horseshoe but I’m struggling with bringing these two together.
I very simple example with toy data would be the following:
set.seed(2409)
library(rstan)
library(dplyr)
library(MASS)
N <- 200 #number of observations
M <- 250 #number of predictors
M0 <- 75 #number of predictors that have a significant effect
#true values
intercept <- c(0.5, 0, -0.5)
beta <- list()
beta[[1]] <- c(rep(0.5, M0), rep(0, M-M0)) #true effects of predictors for y1
beta[[2]] <- c(rep(-0.5, M0), rep(0, M-M0)) #true effects of predictors for y1
beta[[3]] <- c(rep(0.5, M0), rep(0, M-M0)) #true effects of predictors for y1
sigma1<-0.2 #sigma for y1
sigma2<-0.5 #sigma for y2
sigma3<-0.7 #sigma for y3
rho<-0.3 #correlation across sigmas
##this creates the errors with correlations across equations
eps<-mvrnorm(N,mu=c(0,0,0),Sigma=cbind(c(1,rho,rho), c(rho,1,rho),c(rho, rho, 1)))
e1<-eps[,1]*sigma1
e2<-eps[,2]*sigma2
e3<-eps[,3]*sigma3
X <- matrix(rnorm(N*M),N,M) %>%
data.matrix()
y1 <- intercept[1] + X %*% beta[[1]] + e1
y2 <- intercept[2] + X %*% beta[[2]] + e2
y3 <- intercept[3] + X %*% beta[[3]] + e3
Like I said, running the model for the univariate case is no problem. The model code is the following:
code <- "data {
int <lower=1> N; // number of obs
int <lower=1> M; // number of predictor variables
matrix[N,M] X; // predictor matrix
vector[N] y;//observed outcome
//horseshoe prior
real<lower=0> scale_global;
real<lower=1> nu_global;
real<lower=1> nu_local;
real<lower=0> slab_scale;
real<lower=1> slab_df;
}
parameters {
//coefficients
real alpha; //intercept
vector[M] beta_hs_z ; // effects of predictor (z_scale)
//horseshoe
real<lower=0> aux1_global;
real<lower=0> aux2_global;
vector<lower=0>[M] aux1_local;
vector<lower=0>[M] aux2_local;
real<lower=0> caux;
//scales
real<lower=0,upper=pi()/2> sigma_y_unif; // scale for outcome (cauchy reparameterization)
}
transformed parameters {
vector[M] beta_hs; // effects of predictor
real<lower=0> sigma_y; // scale for outcome
//horseshoe
real<lower=0> tau;
vector<lower=0>[M] lambda_hs;
vector<lower=0>[M] lambda_hs_tilde;
real<lower=0> c;
sigma_y = 2.5 * tan(sigma_y_unif);
tau = aux1_global * sqrt(aux2_global) * scale_global*sigma_y;
c = slab_scale * sqrt(caux);
lambda_hs = aux1_local .* sqrt(aux2_local);
lambda_hs_tilde = sqrt( c^2 * square(lambda_hs) ./ (c^2 + tau^2*square(lambda_hs)) );
beta_hs = beta_hs_z .*lambda_hs_tilde*tau;
}
model {
vector[N] mu;
//mu for model
mu = alpha + X*beta_hs;
//priors
//coefficients
alpha ~ normal(0, 1);
beta_hs_z ~ normal(0, 1);
//horseshoe
aux1_local ~ normal(0, 1);
aux2_local ~ inv_gamma (0.5* nu_local , 0.5* nu_local );
aux1_global ~ normal(0, 1);
aux2_global ~ inv_gamma (0.5* nu_global , 0.5* nu_global );
caux ~ inv_gamma (0.5* slab_df , 0.5* slab_df );
//scale
//sigma_y_unif samples from a uniform distribution automatically
//likelihood
y ~ normal(mu, sigma_y) ;
}"
model <- stan(model_code=code,
data = list(N = N,
M = M,
X = X,
y = as.numeric(y1),
scale_global = (M0)/(((M-M0))*sqrt(N)),
nu_global = 1,
nu_local = 1,
slab_scale = 1,
slab_df = 2),
chains = 10,
cores = 10,
iter = 10000,
warmup = 1000,
seed = 2409,
control=list(adapt_delta=0.999, stepsize=0.001, max_treedepth=18))
The model, this example is for y_1,recovers the parameters pretty good.
Now I can extent the model to the multivariate case and can also use the horseshoe prior for one of the equations (e.g. y_1). To ensure identifiability for the “non-horseshoe equations” I set M to e.g. M = 100 and M_0 = 50. The stan code for this example is the following:
code <- "data {
int <lower=1> N; // number of obs
int <lower=1> M; // number of predictors
matrix[N,M] X;//predictor matrix
vector[N] y1;//observed outcome
vector[N] y2;//observed outcome
vector[N] y3;//observed outcome
//horseshoe prior
real<lower=0> scale_global;
real<lower=1> nu_global;
real<lower=1> nu_local;
real<lower=0> slab_scale;
real<lower=1> slab_df;
}
transformed data {
vector[3] y[N];
for (n in 1:N) {
y[n] = [y1[n], y2[n], y3[n]]';
}
}
parameters {
//coefficients
real alpha[3];
vector[M] beta_hs_y1_z;
vector[M] beta_y2;
vector[M] beta_y3;
//horseshoe
real<lower=0> aux1_global_y1;
real<lower=0> aux2_global_y1;
vector<lower=0>[M] aux1_local_y1;
vector<lower=0>[M] aux2_local_y1;
real<lower=0> caux_y1;
//scales
vector <lower=0, upper=pi()/2> [3] sigma_y_unif;
cholesky_factor_corr[3] L_Omega;
}
transformed parameters {
vector[M] beta_hs_y1;
real<lower=0> tau_y1;
vector<lower=0>[M] lambda_hs_y1;
vector<lower=0>[M] lambda_hs_tilde_y1;
real<lower=0> c_y1;
vector <lower=0> [3] sigma_y;
sigma_y = 2.5 * tan(sigma_y_unif);
//horseshoe
tau_y1 = aux1_global_y1 * sqrt(aux2_global_y1) * scale_global*sigma_y[1];
c_y1 = slab_scale * sqrt(caux_y1);
lambda_hs_y1 = aux1_local_y1 .* sqrt(aux2_local_y1);
lambda_hs_tilde_y1 = sqrt( c_y1^2 * square(lambda_hs_y1) ./ (c_y1^2 + tau_y1^2*square(lambda_hs_y1)) );
beta_hs_y1 = beta_hs_y1_z .*lambda_hs_tilde_y1*tau_y1;
}
model {
vector[N] mu1;
vector[N] mu2;
vector[N] mu3;
matrix[3, 3] LSigma;
vector[3] mu[N];
mu1 = alpha[1] + X * beta_hs_y1;
mu2 = alpha[2] + X * beta_y2;
mu3 = alpha[3] + X * beta_y3;
for (n in 1:N) {
mu[n] = [mu1[n], mu2[n], mu3[n]]';
}
LSigma = diag_pre_multiply(sigma_y, L_Omega);
//priors
//coefficients
alpha ~ normal(0, 1);
beta_hs_y1_z ~ normal(0, 1);
beta_y2 ~ normal(0, 1);
beta_y3 ~ normal(0, 1);
//horseshoe
aux1_local_y1 ~ normal(0, 1);
aux2_local_y1 ~ inv_gamma (0.5* nu_local , 0.5* nu_local );
aux1_global_y1 ~ normal(0, 1);
aux2_global_y1 ~ inv_gamma (0.5* nu_global , 0.5* nu_global );
caux_y1 ~ inv_gamma (0.5* slab_df , 0.5* slab_df );
//scale
L_Omega~lkj_corr_cholesky(3);
//likelihood
y ~ multi_normal_cholesky(mu, LSigma);
}
generated quantities {
corr_matrix[3] Residual_cors = multiply_lower_tri_self_transpose(L_Omega);
vector<lower=-1,upper=1>[3] residual_cors;
for (l in 1:3) {
for (j in 1:(l - 1)) {
residual_cors[choose(l - 1, 2) + j] = Residual_cors[j, l];
}
}
}"
model <- stan(model_code=code,
data = list(N = N,
M = M,
X = X,
y1 = as.numeric(y1),
y2 = as.numeric(y2),
y3 = as.numeric(y3),
scale_global = (M0)/((M-M0)*sqrt(N)),
nu_global = 1,
nu_local = 1,
slab_scale = 1,
slab_df = 2),
chains = 4,
cores = 4,
iter = 2000,
warmup = 1000,
control=list(adapt_delta=0.999, stepsize=0.001, max_treedepth=18))
The results are as expected:
iter=2000; warmup=1000; thin=1;
post-warmup draws per chain=1000, total post-warmup draws=4000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
alpha[1] 0.51 0 0.02 0.47 0.49 0.51 0.52 0.54 5749 1
alpha[2] -0.05 0 0.04 -0.13 -0.08 -0.05 -0.02 0.03 3747 1
alpha[3] -0.51 0 0.07 -0.65 -0.55 -0.51 -0.46 -0.38 3629 1
beta_hs_y1[1] 0.49 0 0.02 0.45 0.48 0.49 0.50 0.53 3787 1
beta_hs_y1[2] 0.47 0 0.02 0.44 0.46 0.47 0.49 0.51 3890 1
beta_hs_y1[3] 0.47 0 0.02 0.43 0.45 0.47 0.48 0.51 3676 1
beta_hs_y1[4] 0.49 0 0.02 0.46 0.48 0.49 0.51 0.53 3901 1
beta_hs_y1[5] 0.50 0 0.02 0.46 0.49 0.50 0.51 0.54 3739 1
beta_hs_y1[6] 0.50 0 0.02 0.46 0.49 0.50 0.51 0.54 3890 1
beta_hs_y1[7] 0.49 0 0.02 0.45 0.48 0.49 0.51 0.53 3266 1
beta_hs_y1[8] 0.51 0 0.02 0.47 0.49 0.51 0.52 0.54 3609 1
beta_hs_y1[9] 0.47 0 0.02 0.44 0.46 0.47 0.48 0.50 3844 1
beta_hs_y1[10] 0.51 0 0.02 0.48 0.50 0.51 0.53 0.55 4131 1
...
beta_hs_y1[40] 0.49 0 0.02 0.45 0.48 0.49 0.50 0.52 3547 1
beta_hs_y1[41] 0.52 0 0.02 0.49 0.51 0.52 0.54 0.56 3714 1
beta_hs_y1[42] 0.48 0 0.02 0.44 0.47 0.49 0.50 0.53 3869 1
beta_hs_y1[43] 0.48 0 0.02 0.45 0.47 0.48 0.49 0.51 3924 1
beta_hs_y1[44] 0.53 0 0.02 0.49 0.52 0.53 0.55 0.58 3755 1
beta_hs_y1[45] 0.48 0 0.02 0.45 0.47 0.48 0.49 0.52 4092 1
beta_hs_y1[46] 0.51 0 0.02 0.48 0.50 0.51 0.52 0.55 3836 1
beta_hs_y1[47] 0.48 0 0.02 0.45 0.47 0.49 0.50 0.52 4223 1
beta_hs_y1[48] 0.47 0 0.02 0.43 0.45 0.47 0.48 0.50 3903 1
beta_hs_y1[49] 0.50 0 0.02 0.46 0.49 0.50 0.51 0.54 3950 1
beta_hs_y1[50] 0.49 0 0.02 0.45 0.48 0.49 0.50 0.52 4004 1
beta_hs_y1[51] -0.01 0 0.02 -0.05 -0.02 -0.01 0.00 0.02 3350 1
beta_hs_y1[52] 0.01 0 0.02 -0.02 0.00 0.01 0.02 0.04 3761 1
beta_hs_y1[53] 0.00 0 0.02 -0.03 -0.01 0.00 0.01 0.04 4502 1
beta_hs_y1[54] 0.00 0 0.02 -0.04 -0.01 0.00 0.01 0.03 3858 1
beta_hs_y1[55] 0.03 0 0.02 0.00 0.02 0.03 0.05 0.07 3582 1
beta_hs_y1[56] -0.01 0 0.02 -0.05 -0.02 -0.01 0.00 0.01 2785 1
beta_hs_y1[57] -0.02 0 0.02 -0.05 -0.03 -0.01 0.00 0.02 3164 1
beta_hs_y1[58] -0.01 0 0.02 -0.05 -0.03 -0.01 0.00 0.02 3146 1
beta_hs_y1[59] 0.01 0 0.02 -0.02 0.00 0.01 0.02 0.04 3804 1
beta_hs_y1[60] 0.01 0 0.02 -0.02 0.00 0.01 0.02 0.05 3294 1
...
beta_y2[1] -0.53 0 0.05 -0.62 -0.56 -0.53 -0.50 -0.44 3329 1
beta_y2[2] -0.55 0 0.04 -0.64 -0.58 -0.55 -0.53 -0.47 5060 1
beta_y2[3] -0.47 0 0.05 -0.57 -0.51 -0.47 -0.44 -0.37 2970 1
beta_y2[4] -0.54 0 0.04 -0.63 -0.57 -0.54 -0.51 -0.45 3341 1
beta_y2[5] -0.54 0 0.05 -0.64 -0.57 -0.54 -0.51 -0.45 3053 1
beta_y2[6] -0.46 0 0.04 -0.54 -0.49 -0.46 -0.43 -0.37 4170 1
beta_y2[7] -0.49 0 0.05 -0.59 -0.52 -0.49 -0.46 -0.39 2994 1
beta_y2[8] -0.43 0 0.04 -0.51 -0.46 -0.43 -0.40 -0.35 4260 1
beta_y2[9] -0.54 0 0.04 -0.62 -0.57 -0.54 -0.52 -0.47 4219 1
beta_y2[10] -0.50 0 0.05 -0.60 -0.53 -0.50 -0.47 -0.40 3654 1
...
beta_y2[40] -0.51 0 0.04 -0.60 -0.54 -0.51 -0.48 -0.43 3744 1
beta_y2[41] -0.49 0 0.05 -0.57 -0.52 -0.49 -0.46 -0.39 3964 1
beta_y2[42] -0.50 0 0.05 -0.60 -0.53 -0.50 -0.46 -0.40 2723 1
beta_y2[43] -0.59 0 0.04 -0.67 -0.61 -0.59 -0.56 -0.51 3365 1
beta_y2[44] -0.49 0 0.05 -0.59 -0.53 -0.49 -0.46 -0.39 3382 1
beta_y2[45] -0.50 0 0.04 -0.58 -0.53 -0.50 -0.47 -0.41 3724 1
beta_y2[46] -0.48 0 0.04 -0.56 -0.51 -0.48 -0.45 -0.40 4039 1
beta_y2[47] -0.52 0 0.04 -0.61 -0.55 -0.52 -0.49 -0.44 3820 1
beta_y2[48] -0.49 0 0.04 -0.58 -0.52 -0.49 -0.46 -0.40 3484 1
beta_y2[49] -0.47 0 0.05 -0.56 -0.50 -0.47 -0.44 -0.38 3300 1
beta_y2[50] -0.42 0 0.04 -0.50 -0.45 -0.42 -0.40 -0.34 3483 1
beta_y2[51] -0.07 0 0.04 -0.16 -0.10 -0.07 -0.04 0.02 3356 1
beta_y2[52] 0.02 0 0.04 -0.06 -0.01 0.02 0.05 0.11 3202 1
beta_y2[53] 0.09 0 0.04 0.01 0.06 0.09 0.12 0.18 3520 1
beta_y2[54] 0.02 0 0.04 -0.07 -0.01 0.02 0.05 0.11 3269 1
beta_y2[55] 0.04 0 0.04 -0.04 0.01 0.04 0.07 0.12 3730 1
beta_y2[56] 0.02 0 0.04 -0.05 0.00 0.02 0.05 0.10 3695 1
beta_y2[57] -0.06 0 0.05 -0.15 -0.09 -0.06 -0.03 0.03 2860 1
beta_y2[58] -0.03 0 0.04 -0.11 -0.05 -0.03 0.00 0.06 4485 1
beta_y2[59] -0.05 0 0.05 -0.14 -0.08 -0.05 -0.02 0.04 3897 1
beta_y2[60] 0.01 0 0.04 -0.08 -0.02 0.01 0.04 0.10 3690 1
...
beta_y3[1] 0.51 0 0.07 0.36 0.46 0.51 0.56 0.65 3181 1
beta_y3[2] 0.53 0 0.07 0.39 0.48 0.53 0.58 0.66 5689 1
beta_y3[3] 0.58 0 0.09 0.41 0.52 0.58 0.64 0.75 3351 1
beta_y3[4] 0.56 0 0.07 0.42 0.51 0.56 0.61 0.70 3376 1
beta_y3[5] 0.51 0 0.08 0.36 0.46 0.51 0.56 0.66 2536 1
beta_y3[6] 0.47 0 0.07 0.33 0.42 0.47 0.52 0.61 3779 1
beta_y3[7] 0.50 0 0.08 0.35 0.45 0.50 0.56 0.66 3133 1
beta_y3[8] 0.50 0 0.07 0.37 0.46 0.50 0.55 0.64 4461 1
beta_y3[9] 0.51 0 0.07 0.38 0.47 0.51 0.56 0.64 4831 1
beta_y3[10] 0.50 0 0.08 0.35 0.45 0.50 0.56 0.66 3576 1
...
beta_y3[40] 0.44 0 0.07 0.30 0.39 0.44 0.48 0.57 3483 1
beta_y3[41] 0.54 0 0.07 0.40 0.49 0.54 0.59 0.69 3832 1
beta_y3[42] 0.49 0 0.08 0.32 0.43 0.49 0.55 0.65 3437 1
beta_y3[43] 0.43 0 0.07 0.30 0.38 0.43 0.47 0.57 3612 1
beta_y3[44] 0.54 0 0.08 0.37 0.48 0.53 0.59 0.70 3141 1
beta_y3[45] 0.47 0 0.07 0.33 0.42 0.47 0.52 0.61 3852 1
beta_y3[46] 0.57 0 0.07 0.43 0.52 0.57 0.62 0.70 4059 1
beta_y3[47] 0.43 0 0.07 0.29 0.39 0.43 0.48 0.58 3949 1
beta_y3[48] 0.53 0 0.07 0.38 0.48 0.52 0.57 0.67 3965 1
beta_y3[49] 0.46 0 0.08 0.30 0.40 0.46 0.51 0.61 3595 1
beta_y3[50] 0.47 0 0.07 0.34 0.42 0.47 0.52 0.61 3555 1
beta_y3[51] -0.10 0 0.07 -0.25 -0.15 -0.10 -0.05 0.04 3497 1
beta_y3[52] -0.05 0 0.07 -0.19 -0.09 -0.04 0.00 0.10 3083 1
beta_y3[53] 0.08 0 0.07 -0.06 0.03 0.08 0.13 0.21 3893 1
beta_y3[54] -0.01 0 0.07 -0.15 -0.06 -0.01 0.04 0.14 3637 1
beta_y3[55] 0.09 0 0.07 -0.05 0.04 0.09 0.13 0.22 3642 1
beta_y3[56] -0.01 0 0.06 -0.14 -0.05 -0.01 0.04 0.12 4340 1
beta_y3[57] -0.12 0 0.08 -0.26 -0.17 -0.12 -0.07 0.03 3398 1
beta_y3[58] -0.16 0 0.07 -0.31 -0.21 -0.16 -0.12 -0.03 3908 1
beta_y3[59] 0.03 0 0.07 -0.11 -0.02 0.03 0.08 0.17 4774 1
beta_y3[60] 0.07 0 0.07 -0.07 0.02 0.07 0.12 0.21 3415 1
...
sigma_y[1] 0.19 0 0.01 0.17 0.18 0.19 0.20 0.22 3111 1
sigma_y[2] 0.44 0 0.03 0.39 0.42 0.44 0.46 0.51 2130 1
sigma_y[3] 0.72 0 0.05 0.63 0.69 0.72 0.76 0.84 1968 1
Residual_cors[1,1] 1.00 NaN 0.00 1.00 1.00 1.00 1.00 1.00 NaN NaN
Residual_cors[1,2] 0.14 0 0.09 -0.04 0.08 0.14 0.20 0.32 5964 1
Residual_cors[1,3] 0.16 0 0.09 -0.03 0.09 0.16 0.22 0.33 5585 1
Residual_cors[2,1] 0.14 0 0.09 -0.04 0.08 0.14 0.20 0.32 5964 1
Residual_cors[2,2] 1.00 0 0.00 1.00 1.00 1.00 1.00 1.00 3994 1
Residual_cors[2,3] 0.09 0 0.10 -0.10 0.03 0.10 0.16 0.28 1778 1
Residual_cors[3,1] 0.16 0 0.09 -0.03 0.09 0.16 0.22 0.33 5585 1
Residual_cors[3,2] 0.09 0 0.10 -0.10 0.03 0.10 0.16 0.28 1778 1
Residual_cors[3,3] 1.00 0 0.00 1.00 1.00 1.00 1.00 1.00 3845 1
residual_cors[1] 0.14 0 0.09 -0.04 0.08 0.14 0.20 0.32 5964 1
residual_cors[2] 0.16 0 0.09 -0.03 0.09 0.16 0.22 0.33 5585 1
residual_cors[3] 0.09 0 0.10 -0.10 0.03 0.10 0.16 0.28 1778 1
Samples were drawn using NUTS(diag_e) at Tue Mar 24 10:56:25 2020.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
Now I naively believed that in order to use the horseshoe prior in all the equations I would only have to copy-paste the code for y_1 for the other equations:
code <- "data {
int <lower=1> N; // number of obs
int <lower=1> M; // number of predictors
matrix[N,M] X;//predictor matrix
vector[N] y1;//observed outcome
vector[N] y2;//observed outcome
vector[N] y3;//observed outcome
//horseshoe prior
real<lower=0> scale_global;
real<lower=1> nu_global;
real<lower=1> nu_local;
real<lower=0> slab_scale;
real<lower=1> slab_df;
}
transformed data {
vector[3] y[N];
for (n in 1:N) {
y[n] = [y1[n], y2[n], y3[n]]';
}
}
parameters {
//coefficients
real alpha[3];
vector[M] beta_hs_y1_z;
vector[M] beta_hs_y2_z;
vector[M] beta_hs_y3_z;
//horseshoe
//y1
real<lower=0> aux1_global_y1;
real<lower=0> aux2_global_y1;
vector<lower=0>[M] aux1_local_y1;
vector<lower=0>[M] aux2_local_y1;
real<lower=0> caux_y1;
//y2
real<lower=0> aux1_global_y2;
real<lower=0> aux2_global_y2;
vector<lower=0>[M] aux1_local_y2;
vector<lower=0>[M] aux2_local_y2;
real<lower=0> caux_y2;
//y3
real<lower=0> aux1_global_y3;
real<lower=0> aux2_global_y3;
vector<lower=0>[M] aux1_local_y3;
vector<lower=0>[M] aux2_local_y3;
real<lower=0> caux_y3;
//scales
vector <lower=0, upper=pi()/2> [3] sigma_y_unif;
cholesky_factor_corr[3] L_Omega;
}
transformed parameters {
vector[M] beta_hs_y1;
real<lower=0> tau_y1;
vector<lower=0>[M] lambda_hs_y1;
vector<lower=0>[M] lambda_hs_tilde_y1;
real<lower=0> c_y1;
vector[M] beta_hs_y2;
real<lower=0> tau_y2;
vector<lower=0>[M] lambda_hs_y2;
vector<lower=0>[M] lambda_hs_tilde_y2;
real<lower=0> c_y2;
vector[M] beta_hs_y3;
real<lower=0> tau_y3;
vector<lower=0>[M] lambda_hs_y3;
vector<lower=0>[M] lambda_hs_tilde_y3;
real<lower=0> c_y3;
vector <lower=0> [3] sigma_y;
sigma_y = 2.5 * tan(sigma_y_unif);
//horseshoe
//y1
tau_y1 = aux1_global_y1 * sqrt(aux2_global_y1) * scale_global*sigma_y[1];
c_y1 = slab_scale * sqrt(caux_y1);
lambda_hs_y1 = aux1_local_y1 .* sqrt(aux2_local_y1);
lambda_hs_tilde_y1 = sqrt( c_y1^2 * square(lambda_hs_y1) ./ (c_y1^2 + tau_y1^2*square(lambda_hs_y1)) );
beta_hs_y1 = beta_hs_y1_z .*lambda_hs_tilde_y1*tau_y1;
//y2
tau_y2 = aux1_global_y2 * sqrt(aux2_global_y2) * scale_global*sigma_y[2];
c_y2 = slab_scale * sqrt(caux_y2);
lambda_hs_y2 = aux1_local_y2 .* sqrt(aux2_local_y2);
lambda_hs_tilde_y2 = sqrt( c_y2^2 * square(lambda_hs_y2) ./ (c_y2^2 + tau_y2^2*square(lambda_hs_y2)) );
beta_hs_y2 = beta_hs_y2_z .*lambda_hs_tilde_y2*tau_y2;
//y3
tau_y3 = aux1_global_y3 * sqrt(aux2_global_y3) * scale_global*sigma_y[3];
c_y3 = slab_scale * sqrt(caux_y3);
lambda_hs_y3 = aux1_local_y3 .* sqrt(aux2_local_y3);
lambda_hs_tilde_y3 = sqrt( c_y3^2 * square(lambda_hs_y3) ./ (c_y3^2 + tau_y3^2*square(lambda_hs_y3)) );
beta_hs_y3 = beta_hs_y3_z .*lambda_hs_tilde_y3*tau_y3;
}
model {
vector[N] mu1;
vector[N] mu2;
vector[N] mu3;
matrix[3, 3] LSigma;
vector[3] mu[N];
mu1 = alpha[1] + X * beta_hs_y1;
mu2 = alpha[2] + X * beta_hs_y2;
mu3 = alpha[3] + X * beta_hs_y3;
for (n in 1:N) {
mu[n] = [mu1[n], mu2[n], mu3[n]]';
}
LSigma = diag_pre_multiply(sigma_y, L_Omega);
//priors
//coefficients
alpha ~ normal(0, 1);
beta_hs_y1_z ~ normal(0, 1);
beta_hs_y2_z ~ normal(0, 1);
beta_hs_y3_z ~ normal(0, 1);
//horseshoe
//y1
aux1_local_y1 ~ normal(0, 1);
aux2_local_y1 ~ inv_gamma (0.5* nu_local , 0.5* nu_local );
aux1_global_y1 ~ normal(0, 1);
aux2_global_y1 ~ inv_gamma (0.5* nu_global , 0.5* nu_global );
caux_y1 ~ inv_gamma (0.5* slab_df , 0.5* slab_df );
//y2
aux1_local_y2 ~ normal(0, 1);
aux2_local_y2 ~ inv_gamma (0.5* nu_local , 0.5* nu_local );
aux1_global_y2 ~ normal(0, 1);
aux2_global_y2 ~ inv_gamma (0.5* nu_global , 0.5* nu_global );
caux_y2 ~ inv_gamma (0.5* slab_df , 0.5* slab_df );
//y3
aux1_local_y3 ~ normal(0, 1);
aux2_local_y3 ~ inv_gamma (0.5* nu_local , 0.5* nu_local );
aux1_global_y3 ~ normal(0, 1);
aux2_global_y3 ~ inv_gamma (0.5* nu_global , 0.5* nu_global );
caux_y3 ~ inv_gamma (0.5* slab_df , 0.5* slab_df );
//scale
L_Omega~lkj_corr_cholesky(3);
//likelihood
y ~ multi_normal_cholesky(mu, LSigma);
}
generated quantities {
corr_matrix[3] Residual_cors = multiply_lower_tri_self_transpose(L_Omega);
vector<lower=-1,upper=1>[3] residual_cors;
for (l in 1:3) {
for (j in 1:(l - 1)) {
residual_cors[choose(l - 1, 2) + j] = Residual_cors[j, l];
}
}
}"
model <- stan(model_code=code,
data = list(N = N,
M = M,
X = X,
y1 = as.numeric(y1),
y2 = as.numeric(y2),
y3 = as.numeric(y3),
scale_global = (M0)/((M-M0)*sqrt(N)),
nu_global = 1,
nu_local = 1,
slab_scale = 1,
slab_df = 2),
chains = 4,
cores = 4,
iter = 2000,
warmup = 1000,
control=list(adapt_delta=0.999, stepsize=0.001, max_treedepth=18))
Unfortunately the model seems to fail in this case.
Inference for Stan model: a770f8efe1edc8442df6f5e6daeed073.
4 chains, each with iter=2000; warmup=1000; thin=1;
post-warmup draws per chain=1000, total post-warmup draws=4000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
alpha[1] 0.59 0 0.22 0.16 0.44 0.58 0.73 1.03 1851 1.00
alpha[2] -0.09 0 0.22 -0.54 -0.24 -0.09 0.06 0.33 1885 1.00
alpha[3] -0.47 0 0.22 -0.89 -0.62 -0.47 -0.32 -0.02 1922 1.00
beta_hs_y1[1] 0.00 0 0.00 -0.01 0.00 0.00 0.00 0.01 3741 1.00
beta_hs_y1[2] 0.00 0 0.00 -0.01 0.00 0.00 0.00 0.01 4248 1.00
beta_hs_y1[3] 0.00 0 0.00 -0.01 0.00 0.00 0.00 0.01 3595 1.00
beta_hs_y1[4] 0.00 0 0.00 -0.01 0.00 0.00 0.00 0.01 3528 1.00
beta_hs_y1[5] 0.00 0 0.01 -0.02 0.00 0.00 0.00 0.00 2430 1.00
beta_hs_y1[6] 0.00 0 0.00 -0.01 0.00 0.00 0.00 0.01 4274 1.00
beta_hs_y1[7] 0.00 0 0.00 -0.01 0.00 0.00 0.00 0.01 4207 1.00
beta_hs_y1[8] 0.00 0 0.01 0.00 0.00 0.00 0.00 0.01 3812 1.00
beta_hs_y1[9] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.00 3107 1.00
beta_hs_y1[10] 0.00 0 0.00 -0.01 0.00 0.00 0.00 0.01 3464 1.00
...
beta_hs_y1[40] 0.00 0 0.00 -0.01 0.00 0.00 0.00 0.01 3526 1.00
beta_hs_y1[41] 0.00 0 0.00 -0.01 0.00 0.00 0.00 0.01 3651 1.00
beta_hs_y1[42] 0.00 0 0.00 -0.01 0.00 0.00 0.00 0.01 3473 1.00
beta_hs_y1[43] 0.00 0 0.01 -0.03 0.00 0.00 0.00 0.00 1964 1.00
beta_hs_y1[44] 0.00 0 0.01 0.00 0.00 0.00 0.00 0.02 2830 1.00
beta_hs_y1[45] 0.00 0 0.00 -0.01 0.00 0.00 0.00 0.01 4209 1.00
beta_hs_y1[46] 0.00 0 0.00 -0.01 0.00 0.00 0.00 0.01 4365 1.00
beta_hs_y1[47] 0.00 0 0.00 -0.01 0.00 0.00 0.00 0.01 4212 1.00
beta_hs_y1[48] 0.00 0 0.00 -0.01 0.00 0.00 0.00 0.01 4398 1.00
beta_hs_y1[49] 0.00 0 0.00 -0.01 0.00 0.00 0.00 0.01 3387 1.00
beta_hs_y1[50] 0.00 0 0.01 0.00 0.00 0.00 0.00 0.01 3045 1.00
beta_hs_y1[51] 0.00 0 0.00 -0.01 0.00 0.00 0.00 0.01 4268 1.00
beta_hs_y1[52] 0.00 0 0.00 0.00 0.00 0.00 0.00 0.01 4180 1.00
beta_hs_y1[53] 0.00 0 0.00 0.00 0.00 0.00 0.00 0.01 3938 1.00
beta_hs_y1[54] 0.00 0 0.00 -0.01 0.00 0.00 0.00 0.01 3872 1.00
beta_hs_y1[55] 0.00 0 0.00 -0.01 0.00 0.00 0.00 0.01 4095 1.00
beta_hs_y1[56] 0.00 0 0.00 -0.01 0.00 0.00 0.00 0.01 3981 1.00
beta_hs_y1[57] 0.00 0 0.00 -0.01 0.00 0.00 0.00 0.01 3717 1.00
beta_hs_y1[58] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.01 3407 1.00
beta_hs_y1[59] 0.00 0 0.00 -0.01 0.00 0.00 0.00 0.00 3444 1.00
beta_hs_y1[60] 0.00 0 0.00 -0.01 0.00 0.00 0.00 0.01 4375 1.00
...
beta_hs_y2[1] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.04 2300 1.00
beta_hs_y2[2] 0.00 0 0.01 -0.03 0.00 0.00 0.00 0.01 2865 1.00
beta_hs_y2[3] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.01 3733 1.00
beta_hs_y2[4] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.01 3790 1.00
beta_hs_y2[5] -0.01 0 0.02 -0.08 0.00 0.00 0.00 0.01 1556 1.00
beta_hs_y2[6] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.02 3080 1.00
beta_hs_y2[7] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.01 4853 1.00
beta_hs_y2[8] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.02 3771 1.00
beta_hs_y2[9] 0.00 0 0.02 -0.06 0.00 0.00 0.00 0.01 1903 1.00
beta_hs_y2[10] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.02 3209 1.00
...
beta_hs_y2[40] 0.00 0 0.01 -0.02 0.00 0.00 0.00 0.01 3836 1.00
beta_hs_y2[41] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.02 3094 1.00
beta_hs_y2[42] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.02 3035 1.00
beta_hs_y2[43] -0.02 0 0.04 -0.13 -0.02 0.00 0.00 0.00 962 1.01
beta_hs_y2[44] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.02 3423 1.00
beta_hs_y2[45] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.01 3352 1.00
beta_hs_y2[46] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.03 2603 1.00
beta_hs_y2[47] 0.00 0 0.01 -0.02 0.00 0.00 0.00 0.01 3105 1.00
beta_hs_y2[48] 0.00 0 0.01 -0.02 0.00 0.00 0.00 0.01 3484 1.00
beta_hs_y2[49] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.01 3276 1.00
beta_hs_y2[50] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.04 2363 1.00
beta_hs_y2[51] 0.00 0 0.01 -0.02 0.00 0.00 0.00 0.01 4228 1.00
beta_hs_y2[52] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.01 4430 1.00
beta_hs_y2[53] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.02 3101 1.00
beta_hs_y2[54] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.01 3936 1.00
beta_hs_y2[55] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.02 3872 1.00
beta_hs_y2[56] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.01 3185 1.00
beta_hs_y2[57] 0.00 0 0.01 -0.02 0.00 0.00 0.00 0.01 4116 1.00
beta_hs_y2[58] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.01 4024 1.00
beta_hs_y2[59] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.01 3485 1.00
beta_hs_y2[60] 0.00 0 0.01 -0.02 0.00 0.00 0.00 0.01 3901 1.00
...
beta_hs_y3[1] 0.00 0 0.02 -0.01 0.00 0.00 0.00 0.05 2055 1.00
beta_hs_y3[2] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.01 2833 1.00
beta_hs_y3[3] 0.00 0 0.01 -0.02 0.00 0.00 0.00 0.01 3022 1.00
beta_hs_y3[4] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.01 4275 1.00
beta_hs_y3[5] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.01 3745 1.00
beta_hs_y3[6] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.02 2269 1.00
beta_hs_y3[7] 0.00 0 0.01 -0.02 0.00 0.00 0.00 0.01 3497 1.00
beta_hs_y3[8] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.01 3891 1.00
beta_hs_y3[9] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.01 4191 1.00
beta_hs_y3[10] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.01 4023 1.00
...
beta_hs_y3[40] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.01 3757 1.00
beta_hs_y3[41] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.02 3605 1.00
beta_hs_y3[42] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.02 3728 1.00
beta_hs_y3[43] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.01 3732 1.00
beta_hs_y3[44] 0.00 0 0.01 -0.02 0.00 0.00 0.00 0.01 3242 1.00
beta_hs_y3[45] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.01 3398 1.00
beta_hs_y3[46] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.03 2795 1.00
beta_hs_y3[47] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.01 3901 1.00
beta_hs_y3[48] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.02 3597 1.00
beta_hs_y3[49] 0.00 0 0.01 -0.02 0.00 0.00 0.00 0.01 3265 1.00
beta_hs_y3[50] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.01 3607 1.00
beta_hs_y3[51] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.01 3240 1.00
beta_hs_y3[52] 0.00 0 0.01 -0.02 0.00 0.00 0.00 0.01 3333 1.00
beta_hs_y3[53] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.02 2883 1.00
beta_hs_y3[54] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.01 3739 1.00
beta_hs_y3[55] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.02 2930 1.00
beta_hs_y3[56] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.02 3269 1.00
beta_hs_y3[57] 0.00 0 0.01 -0.03 0.00 0.00 0.00 0.01 3396 1.00
beta_hs_y3[58] 0.00 0 0.01 -0.04 0.00 0.00 0.00 0.01 2327 1.00
beta_hs_y3[59] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.02 2811 1.00
beta_hs_y3[60] 0.00 0 0.01 -0.01 0.00 0.00 0.00 0.01 3981 1.00
...
sigma_y[1] 3.27 0 0.16 2.99 3.16 3.27 3.38 3.60 1438 1.00
sigma_y[2] 3.31 0 0.16 3.02 3.20 3.31 3.43 3.65 1477 1.00
sigma_y[3] 3.34 0 0.16 3.03 3.22 3.33 3.45 3.67 1486 1.00
Residual_cors[1,1] 1.00 NaN 0.00 1.00 1.00 1.00 1.00 1.00 NaN NaN
Residual_cors[1,2] -0.99 0 0.00 -0.99 -0.99 -0.99 -0.99 -0.98 1833 1.00
Residual_cors[1,3] 0.98 0 0.00 0.97 0.98 0.98 0.98 0.98 2043 1.00
Residual_cors[2,1] -0.99 0 0.00 -0.99 -0.99 -0.99 -0.99 -0.98 1833 1.00
Residual_cors[2,2] 1.00 NaN 0.00 1.00 1.00 1.00 1.00 1.00 NaN NaN
Residual_cors[2,3] -0.96 0 0.01 -0.97 -0.97 -0.96 -0.96 -0.95 1960 1.00
Residual_cors[3,1] 0.98 0 0.00 0.97 0.98 0.98 0.98 0.98 2043 1.00
Residual_cors[3,2] -0.96 0 0.01 -0.97 -0.97 -0.96 -0.96 -0.95 1960 1.00
Residual_cors[3,3] 1.00 NaN 0.00 1.00 1.00 1.00 1.00 1.00 NaN NaN
residual_cors[1] -0.99 0 0.00 -0.99 -0.99 -0.99 -0.99 -0.98 1833 1.00
residual_cors[2] 0.98 0 0.00 0.97 0.98 0.98 0.98 0.98 2043 1.00
residual_cors[3] -0.96 0 0.01 -0.97 -0.97 -0.96 -0.96 -0.95 1960 1.00
Samples were drawn using NUTS(diag_e) at Tue Mar 24 09:41:03 2020.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
Does anyone have an idea how to implement the horseshoe prior in the multivariate setting correctly?
Thanks!