I took the sample code at the end of this paper, copied below:
data {
int<lower=0> n; // number of observations
int<lower=0> d; // number of predictors
vector[n] y; // outputs
matrix[n,d] x; // inputs
real<lower=0> scale_icept; // prior std for the intercept
real<lower=0> scale_global; // scale for the half-t prior for tau
// ( tau0 = scale_global*sigma )
real<lower=1> nu_global; // degrees of freedom for the half-t prior for tau
real<lower=1> nu_local; // degrees of freedom for the half-t priors for lambdas
// ( nu_local = 1 corresponds to the horseshoe )
}
parameters {
real beta0; // intercept
real logsigma; // log of noise std
// auxiliary variables that define the global and local parameters
vector[d] z;
real<lower=0> r1_global;
real<lower=0> r2_global;
vector<lower=0>[d] r1_local;
vector<lower=0>[d] r2_local;
}
transformed parameters {
real <lower=0> tau; // global shrinkage parameter
vector <lower=0>[d] lambda; // local shrinkage parameters
vector[d] beta; // regression coefficients
vector[n] f; // latent values
real sigma; // noise std
sigma = exp(logsigma);
lambda = r1_local .* sqrt(r2_local);
tau = r1_global * sqrt(r2_global);
beta = z .* lambda*tau;
f = beta0 + x*beta;
}
model {
// half -t priors for lambdas
z ~ normal (0, 1);
r1_local ~ normal (0.0 , 1.0);
r2_local ~ inv_gamma (0.5* nu_local , 0.5* nu_local);
// half -t prior for tau
r1_global ~ normal (0.0 , scale_global * sigma);
r2_global ~ inv_gamma (0.5* nu_global , 0.5* nu_global);
// gaussian prior for the intercept
beta0 ~ normal (0, scale_icept);
// observation model
y ~ normal (f, sigma);
}
I have simulated 30 observations from 20 non-zero coefficients and 180 zeros as follows:
# simulation function
normsim <- function(n, betas, disp){
x <- apply(t(rep(n,length(betas)-1)), 2, rnorm)
mu <- betas[1] + x%*%betas[-1]
y <- rnorm(n, mu, sqrt(disp))
data.frame(y=y,x)
}
# do it
n <- 30
betas <- c(2,
rep(10,10),
rep(-10,10),
rep(0,180))
disp <- 0.01
set.seed(2)
d <- normsim(n,betas,disp)
# estimation
# calculating recommended scale_global
p0 <- 20
D <- 180
sg <- p0/((D-p0)*sqrt(n))
list(n = length(d[,1]), d = ncol(d[,-1]),
y = d[,1], x = d[,-1],
scale_icept = 5, scale_global = sg,
nu_global = 1, nu_local = 1) -> d.stan
stan(file = "normal-hs.stan",
data = d.stan, iter = 2000, chains = 4,
control = list(adapt_delta = 0.991, max_treedepth = 11)) -> fit.hs.norm
I’m not sure what to do with adapt_delta and max_treedepth since any given set of values will alternate between not giving any issues and doing so. But regardless of anything I’ve tried so far, traceplots and coefficient posteriors aren’t looking good.
Is this just a case of not having enough information for proper estimation or are there any further adjustments that could be made?