Program and data are not secret. I was just in a hurry when writing that post.
Data is Prostate_GE listed at http://featureselection.asu.edu/datasets.php
The model is described in Piironen and Vehtari (2017) https://projecteuclid.org/euclid.ejs/1513306866. The code is slightly modified from appendix C.1. There are less transformed parameters saved and bernoulli_logit_glm()
is used instead of bernoulli_logit()
(this gives about 4x speedup).
bernoulli_logit_glm_rhs.stan
code is
data {
int<lower=0> n; // number of observations
int<lower=0> d; // number of predictors
int<lower=0,upper=1> y[n]; // outputs
matrix[n,d] x; // inputs
real<lower=0> scale_icept; // prior std for the intercept
real<lower=0> scale_global; // scale for the half-t prior for tau
real<lower=1> nu_global; // degrees of freedom for the half-t priors for tau
real<lower=1> nu_local; // degrees of freedom for the half-t priors for lambdas
// (nu_local = 1 corresponds to the horseshoe)
real<lower=0> slab_scale; // for the regularized horseshoe
real<lower=0> slab_df;
}
parameters {
real beta0;
vector[d] z; // for non-centered parameterization
real <lower=0> tau; // global shrinkage parameter
vector <lower=0>[d] lambda; // local shrinkage parameter
real<lower=0> caux;
}
transformed parameters {
vector[d] beta; // regression coefficients
{
vector[d] lambda_tilde; // 'truncated' local shrinkage parameter
real c = slab_scale * sqrt(caux); // slab scale
lambda_tilde = sqrt( c^2 * square(lambda) ./ (c^2 + tau^2*square(lambda)));
beta = z .* lambda_tilde*tau;
}
}
model {
// half-t priors for lambdas and tau, and inverse-gamma for c^2
z ~ std_normal();
lambda ~ student_t(nu_local, 0, 1);
tau ~ student_t(nu_global, 0, scale_global*2);
caux ~ inv_gamma(0.5*slab_df, 0.5*slab_df);
beta0 ~ normal(0, scale_icept);
y ~ bernoulli_logit_glm(x, beta0, beta);
}
generated quantities {
vector[n] f = beta0 + x*beta;
vector[n] log_lik;
for (i in 1:n)
log_lik[i] = bernoulli_logit_glm_lpmf({y[i]} | [x[i]], beta0, beta);
}
R code for preparing data and running the model
prostate <- read.csv("prostate.csv", header=FALSE)
y<-prostate[,5967]
x<-prostate[,1:5966]
n = nrow(x);
p = ncol(x);
## regularized horseshoe prior
p0 <- 5
sigma <- sqrt(1/mean(y)/(1-mean(y)))
sigma <- 1
tau0 <- p0/(p-p0)*sigma/sqrt(n)
## data
data <- list(n = n, d = ncol(x), x = x, y = y,
nu_local = 1, nu_global = 1, scale_global = tau0,
scale_icept=5, slab_scale=2, slab_df=100)
fitpb1 <- stan("bernoulli_logit_glm_rhs.stan", data=data, chains=4, iter=3000, warmup=1000,
control=list(adapt_delta = 0.8, max_treedepth=10), cores=2)