Hi folks,
I have a model that consistently returns low Bayesian Fraction of Missing Information warnings on both real and simulated data. My pairs plot suggests that the shape and rate parameters of the gamma distribution are contributing to the low BFMI; however, I’ve struggled to reparameterize the distribution in a way which resolves the issue. I have attempted the parameterization described here, which does seem to reduce the correlation between the parameters of the gamma distribution, but still produces a BFMI warning.
Would you have any advice on how to reparameterize the gamma distribution (or take other steps) to avoid the low BFMI warning?
Model code:
data {
int<lower=0> n_obs_known;
vector[n_obs_known] HAT_known;
int<lower=0> n_obs_unknown;
vector[n_obs_unknown] HAT_unknown;
}
parameters {
real mu_bias;
real<lower=0> sigma_error;
real<lower=0, upper=1> flight_prior;
real<lower=0> shape;
real<lower=0> rate;
real<lower=0> real_alt[n_obs_unknown];
}
transformed parameters {
real unknown_q[n_obs_unknown, 2];
for(i in 1:n_obs_unknown){
// Marginalized discrete parameter (https://mc-stan.org/docs/stan-users-guide/latent-discrete.html)
// Measuring the likelihood that a given location is in a ground or flight state
unknown_q[i, 1] = normal_lpdf(HAT_unknown[i]| mu_bias, sigma_error) + log1m(flight_prior);
unknown_q[i, 2] = normal_lpdf(HAT_unknown[i]| real_alt[i] + mu_bias, sigma_error) + log(flight_prior);
}
}
model {
// likelihood of known locations
target += normal_lpdf(HAT_known| mu_bias, sigma_error);
// likelihood of unknown locations
for (i in 1:n_obs_unknown){
target += log_sum_exp(unknown_q[i, 1], unknown_q[i, 2]);
}
//describing the altitude distribution
real_alt ~ gamma(shape, rate);
//priors
mu_bias ~ normal(0, 1); // can be negative
sigma_error ~ uniform(0, 1); // cannot be negative
flight_prior ~ beta(30.5, 60); // informative prior: mean = 0.33, sd = 0.05
shape ~ normal(0, 5) T[0,];
rate ~ normal(0, 10) T[0,];
}
R code to generate the simulated data:
library(dplyr)
library(purrr)
library(rstan)
options(mc.cores = 4)
# Known ground locations
known_ground <- 13000
measurement_error <- 50/2200
known_ground_df <- tibble(HAT = rnorm(n=known_ground, mean=0, sd=measurement_error), HAT_index = rep(1, known_ground))
# Potential flight locations
unknown_flight <- 141
unknown_ground <- 287
shape <- 1.25
rate <- 7.82
unknown_flight_df <- tibble(HAT = rgamma(unknown_flight, shape = shape, rate = rate), HAT_index = rep(1, unknown_flight))
unknown_flight_df$HAT <- unknown_flight_df$HAT %>%
map(.f = function(x){ # incorporate measurement error
rnorm(n = 1, mean = x, sd = measurement_error)
}) %>%
unlist()
unknown_ground_df <- tibble(HAT = rnorm(n=unknown_ground, mean=0, sd=measurement_error), HAT_index = rep(0, unknown_ground))
unknown_df <- bind_rows(unknown_flight_df, unknown_ground_df)
# Compile and fit
model_compiled <- stan_model("stan_model.stan")
init <- function(){list(mu_bias = rnorm(1,0,0.2),
sigma_error = runif(1,0,0.2),
shape = runif(1,3,5),
rate = runif(1,5,10),
flight_prior = 0.33)}
fit <- sampling(model_compiled, data = list(n_obs_known = nrow(known_ground_df),
HAT_known = known_ground_df$HAT,
n_obs_unknown = nrow(unknown_df),
HAT_unknown = unknown_df$HAT),
init = init,
pars = c("mu_bias", "sigma_error", "shape", "rate", "flight_prior"),
iter = 15000,
chains = 4)