…are we really missing poorly written model!? :-) That sounds like a solvable issue.
Below is Piironen and Vehtari’s Ponyshoe prior model. They have two parameterizations in their appendix - one good and one less good. Below is the bad one.
When I ran this script I got 592 divergence errors. The likelihood-sorted list of pairs of variables gave the following:
1: Combinations of the intercept and some other parameter. Pretty sure this is a false positive.
2: Combinations of a parameter + the same parameter, just transformed (i.e. x and i.e. ln x). So false positive, but easy to rule out.
3: the “lambda”-parameters. These should be reparameterised, and they are in Aki’s paper.
So yes - this method does give false positives, but also seem to flag problem parameters. See script below.
library(mvnfast)
library(rstan)
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
rm(list=ls())
set.seed(123)
library(shinystan)
n <- 10
d <- 9
bvec <- rnorm(d)/rnorm(d)
x <- matrix(n*d, nrow=n, ncol=d)
y <- as.vector(x %*% bvec + rnorm(n))
p0 <-5
scale_icept<-100
scale_global<-p0/((d-p0)*sqrt(n))
nu_global <-1
nu_local <-1
slab_scale <-100
slab_df <-1
stanmodelcode<-"
data {
int < lower =0> n; # number of observations
int < lower =0> d; # number of predictors
vector [n] y; # outputs
matrix [n,d] x; # inputs
real < lower =0> scale_icept ; # prior std for the intercept
real < lower =0> scale_global ; # scale for the half -t prior for tau
real < lower =1> nu_global ; # degrees of freedom for the half -t priors for tau
real < lower =1> nu_local ; # degrees of freedom for the half -t priors for lambdas
real < lower =0> slab_scale ; # slab scale for the regularized horseshoe
real < lower =0> slab_df ; # slab degrees of freedom for the regularized horseshoe
}
parameters {
real logsigma ;
real beta0 ;
vector [d] z;
real < lower =0> tau; # global shrinkage parameter
vector < lower =0 >[d] lambda ; # local shrinkage parameter
real < lower =0> caux ;
}
transformed parameters {
real < lower =0> sigma ; # noise std
vector < lower =0 >[d] lambda_tilde ; # 'truncated ' local shrinkage parameter
real < lower =0> c; # slab scale
vector [d] beta ; # regression coefficients
vector [n] f; # latent function values
sigma = exp ( logsigma );
c = slab_scale * sqrt ( caux );
lambda_tilde = sqrt ( c^2 * square ( lambda ) ./ (c^2 + tau ^2* square ( lambda )) );
beta = z .* lambda_tilde *tau;
f = beta0 + x* beta ;
}
model {
# half -t priors for lambdas and tau , and inverse - gamma for c^2
z ~ normal (0, 1);
lambda ~ student_t ( nu_local , 0, 1);
tau ~ student_t ( nu_global , 0, scale_global * sigma );
caux ~ inv_gamma (0.5* slab_df , 0.5* slab_df );
beta0 ~ normal (0, scale_icept );
y ~ normal (f, sigma );
}
"
f <- stan(model_code = stanmodelcode,
iter=2000,
control=list(max_treedepth=13),
chains=2
)
smpls <- as.data.frame(extract(f))
pars <- dim(smpls)[2]-1
normtests <- as.data.frame(matrix(NA, ncol=3,nrow = pars*(pars-1)/2))
colnames(normtests) <- c("par1", "par2", "loglik")
cnt<-1
for(i in 1:(pars-1)){
for(j in (i+1):pars){
parvar <- cbind(smpls[,i],smpls[,j])
normtests[cnt,1] <- names(smpls)[i]
normtests[cnt,2] <- names(smpls)[j]
tmp <- try(sum(dmvn(parvar,mu=colMeans(parvar), sigma=cov(parvar), log=T)))
if(inherits(tmp,"try-error")==F){
normtests[cnt,3]=tmp
}
cnt<-cnt+1
}
}
normtests <- normtests[order(normtests[,3]),]
head(normtests, n=20)