I’m dealing with some data for which a traditional gaussian model fails PPCs when checking skewness & kurtosis, so I’m modelling a latent gaussian connected to the data through the Yao-Johnson transform (a box-cox-like normality transform). @Bob_Carpenter provided the code for the transform, but to do PPCs I need to reverse it and I’m having trouble working out the math. Below is the model, the commented out g_reverse
function is what I’m looking to make, with it’s use in the commented-out section in generated quantities. Any help would be greatly appreciated!
functions {
// h: helper function for Yeo-Johnson transform (below)
real h(real a, real b) {
return (pow(a, b) - 1) / b;
}
// g: Yeo-Johnson transform (from: http://discourse.mc-stan.org/t/user-written-box-cox-type-transformation-function/1966/6)
real g(real y, real p) {
real eps = 0.005;
if (y >= 0) {
if (fabs(p) <= eps)
return log1p(y);
return h(1 + y, p);
}
if (p >= 2 - eps && p <= 2 + eps)
return -log1m(y);
return -h(1 - y, 2 - p);
}
//g_reverse: reverse operation from g()
// real g_reverse(real y_prime, real p) {
// ???
// }
}
data {
// N: number of cases
int<lower=1> N ;
// K: number of outcomes
int<lower=1> K ;
// Y: matrix of outcomes for each case & variable
matrix[N,K] Y ;
}
parameters {
// means: vector of population-level means for each latent gaussian outcome
vector[K] means ;
// sds: vector of population-level sds for each latent gaussian outcome
vector<lower=0>[K] sds ;
// cor: population-level correlations among latent gaussian outcomes
corr_matrix[K] cor ;
// lambdas: vector of normality transform parameter for each outcome
vector[K] lambdas ;
}
transformed parameters{
// Y_norm: latent gaussian outcomes
matrix[N,K] Y_norm ;
for(ni in 1:N){
for(ki in 1:K){
Y_norm[ni,ki] = g(Y[ni,ki],lambdas[ki]) ;
}
}
}
model {
//priors on parameters; presumes data has been scale()ed
means ~ normal(0,1) ;
sds ~ weibull(2,1) ;
cor ~ lkj_corr(1) ;
//lambdas ~ uniform(0,2) ; //commented out because this is implicit from bounded declaration of lambdas above
//assert sampling of case-level outcomes on each variable given multivariate normal population
// n.b. this part could be sped up considerably, but aiming for clarity at first.
for(ni in 1:N){
Y_norm[ni,] ~ multi_normal(means,quad_form_diag(cor,sds)) ;
}
}
generated quantities{
// Y_norm_rep: model-generated latent gaussian outcomes for PPCs
matrix[N,K] Y_norm_rep ;
// Y_rep: model-generated outcomes for PPCs
// matrix[N,K] Y_rep ;
for(ni in 1:N){
Y_norm_rep[ni,] = transpose(multi_normal_rng(means,quad_form_diag(cor,sds))) ;
// for(ki in 1:K){
// Y_rep[ni,ki] = g_reverse(Y_norm_rep[ni,ki]) ;
// }
}
}