What's the highest dimensional model Stan can fit using NUTS?

Program and data are not secret. I was just in a hurry when writing that post.

Data is Prostate_GE listed at http://featureselection.asu.edu/datasets.php

The model is described in Piironen and Vehtari (2017) https://projecteuclid.org/euclid.ejs/1513306866. The code is slightly modified from appendix C.1. There are less transformed parameters saved and bernoulli_logit_glm() is used instead of bernoulli_logit() (this gives about 4x speedup).

bernoulli_logit_glm_rhs.stan code is

data {
  int<lower=0> n;				      // number of observations
  int<lower=0> d;             // number of predictors
  int<lower=0,upper=1> y[n];	// outputs
  matrix[n,d] x;				      // inputs
  real<lower=0> scale_icept;	// prior std for the intercept
  real<lower=0> scale_global;	// scale for the half-t prior for tau
  real<lower=1> nu_global;	  // degrees of freedom for the half-t priors for tau
  real<lower=1> nu_local;		  // degrees of freedom for the half-t priors for lambdas
                              // (nu_local = 1 corresponds to the horseshoe)
  real<lower=0> slab_scale;   // for the regularized horseshoe
  real<lower=0> slab_df;
}

parameters {
  real beta0;
  vector[d] z;                // for non-centered parameterization
  real <lower=0> tau;         // global shrinkage parameter
  vector <lower=0>[d] lambda; // local shrinkage parameter
  real<lower=0> caux;
}

transformed parameters {
  vector[d] beta;                     // regression coefficients
  {
    vector[d] lambda_tilde;   // 'truncated' local shrinkage parameter
    real c = slab_scale * sqrt(caux); // slab scale
    lambda_tilde = sqrt( c^2 * square(lambda) ./ (c^2 + tau^2*square(lambda)));
    beta = z .* lambda_tilde*tau;
  }
}

model {
  // half-t priors for lambdas and tau, and inverse-gamma for c^2
  z ~ std_normal();
  lambda ~ student_t(nu_local, 0, 1);
  tau ~ student_t(nu_global, 0, scale_global*2);
  caux ~ inv_gamma(0.5*slab_df, 0.5*slab_df);
  beta0 ~ normal(0, scale_icept);
  
  y ~ bernoulli_logit_glm(x, beta0, beta);
}
generated quantities {
  vector[n] f = beta0 + x*beta;
  vector[n] log_lik;
  for (i in 1:n)
    log_lik[i] = bernoulli_logit_glm_lpmf({y[i]} | [x[i]], beta0, beta);
}

R code for preparing data and running the model

prostate <- read.csv("prostate.csv", header=FALSE)
y<-prostate[,5967]
x<-prostate[,1:5966]

n = nrow(x);
p = ncol(x);

## regularized horseshoe prior
p0 <- 5
sigma <- sqrt(1/mean(y)/(1-mean(y)))
sigma <- 1
tau0 <- p0/(p-p0)*sigma/sqrt(n)
## data
data <- list(n = n, d = ncol(x), x = x, y = y,
             nu_local = 1, nu_global = 1, scale_global = tau0,
             scale_icept=5, slab_scale=2, slab_df=100)

fitpb1 <- stan("bernoulli_logit_glm_rhs.stan", data=data, chains=4, iter=3000, warmup=1000,
             control=list(adapt_delta = 0.8, max_treedepth=10), cores=2)
6 Likes