Hello,
I am trying to fit the zero-inflated beta regression of Tang et al., 2023 (Zero-Inflated Beta Distribution Regression Modeling | SpringerLink). This involves extending the Beta distribution to the user defined scale (-a,1). Implementation in STAN is proving tricky due to a couple of variables that need to be marginalised out (namely zi and W in the below).
An indicator variable zi is defined, denoting whether an observation y is truly 0:
- Where y > 0, zi = 0.
- Where y = 0, zi is modelled as a latent variable, under the rational that y may truly be 0, and thus zi = 1, or that y may be 0 due to chance, and thus zi = 0.
Zeros due to chance are considered to have arisen from the Beta component. So, instead of modelling y , V is modelled. Where y > 0, V is known. Where y = 0 and zi = 0, a latent Beta distributed V is sampled.
The model should look like (I don’t know LaTex, so excuse writing this in line):
a = 1 // Setting a to 1 gives even distribution either side of 0
c = (a / a + 1)
μ = α1 + β1X
φ = α2
π = α3 + β2X
A = μ * φ
B = (1 - μ) * φ
if(y > 0):
zi = 0
V = (y + a) / (a + 1)
V ~ Beta(A, B)
zi ~ Bernoulli(pi)
if(y = 0):
zi ~ Bernoulli( π / (π + (1 - π)P(V ≤ c)) )
if(zi = 0):
y = max(0,W)
W = (a + 1)V - a ∴ P(W ≤ 0) = P(V ≤ c)
V ~ Beta(A, B) T(0, c)
I believe I have figured out how to marginilise out zi, I am less confident about how to work around y = max(0,W). This is the code I have come up with at the moment.
The model compiles but I get errors about the shape parameters being 0 due to the initial values, which leads to log probabilities of -∞. I’ve tried setting initial values (which doesn’t work) but I doubt this is the root of the problem anyway. Interestingly though, these errors refer to the function for non-zero observations, but I’m far more unsure about the function for the zero observations.
functions{
/* Log-probability density of a beta distributed response with an upper bound */
real upper_bound_beta_lpdf(real y, real alpha, real beta_, real ub) {
real x = y / ub;
return beta_lpdf(x | alpha, beta_) - log(ub);
}
/* Log-probability density for observations where y > 0 */
real tang_zi_nonzero_y_beta_lpdf(real V, real mu, real phi, real pi_){
row_vector[2] shape = [mu * phi, (1 - mu) * phi];
return bernoulli_logit_lpmf(0 | pi_) + beta_lpdf(V | shape[1], shape[2]);
}
/* Log-probability density for observations where y = 0 */
real tang_zi_zero_y_beta_lpdf(real V, real mu, real phi, real pi_, real c) {
row_vector[2] shape = [mu * phi, (1 - mu) * phi];
real PVc = 1 - exp(beta_lccdf(c | shape[1], shape[2]));
real pi_p = inv_logit(pi_);
real PT0 = pi_p / (pi_p + (1 - pi_p) * PVc);
return log_sum_exp(bernoulli_lpmf(1 | PT0),
bernoulli_lpmf(0 | PT0) + upper_bound_beta_lpdf(V | shape[1], shape[2], c));
}
}
data {
int<lower=1> N; // Total number of observations
vector[N] Y; // Response variable
int<lower=1> nNonZero; // Number of non-zero observations
int obsNonZero[nNonZero]; // Indices of non-zero observations
int obsZero[N - nNonZero]; // Indices of zero observations
int<lower=1> K; // Number of population-level covariates for main model mean parameter
matrix[N, K] X; // Population-level design matrix for main model mean parameter
int<lower=1> K_pi; // Number of population-level covariates for zero-inflation component
matrix[N, K_pi] X_pi; // Population-level design matrix for zero-inflation component
real<lower=0> a; // Lower bound of extended Beta distribution
}
transformed data {
matrix[N, K] Xc; // Centered version of X
vector[K] means_X; // Column means of X before centering
matrix[N, K_pi] Xc_pi; // Centered version of X_pi
vector[K_pi] means_X_pi; // Column means of X_pi before centering
// Scale and centre predictor variables
for (i in 1:K) {
means_X[i] = mean(X[, i]);
Xc[, i] = (X[, i] - means_X[i]) / sd(X[, i]);
}
for (i in 1:K_pi) {
means_X_pi[i] = mean(X_pi[, i]);
Xc_pi[, i] = (X_pi[, i] - means_X_pi[i]) / sd(X_pi[, i]);
}
// Tang ZIBR data
int nZero = (N - nNonZero);
real<lower=0> c = (a / (a + 1)); // Threshold for whether y = 0 is truly 0; P(Y = 0) = P(V <= c)
real<lower=0,upper=1> Vy[nNonZero]; // Where Y > 0, V is known
for(i in 1:nNonZero){
Vy[i] = (Y[obsNonZero[i]] + a) / (a + 1);
}
}
parameters {
vector[K] b; // Coefficients
real Intercept; // Temporary intercept for centered predictors
real Intercept_phi; // Temporary intercept for centered predictors
vector[K_pi] b_pi; // pi regression coefficients
real Intercept_pi; // Temporary intercept for centered predictors
real<lower=0,upper=1> V0[nZero]; // Latent beta variable V for zero observations
}
model {
// Linear predictor terms
vector[N] mu = rep_vector(0.0, N);
vector[N] phi = rep_vector(0.0, N);
vector[N] pi_ = rep_vector(0.0, N);
mu += Intercept + X * b;
phi += Intercept_phi;
pi_ += Intercept_pi + X_pi * b_pi;
mu = inv_logit(mu);
phi = exp(phi);
for(n in 1:nNonZero){
target += tang_zi_nonzero_y_beta_lpdf(Vy[n] | mu[obsNonZero[n]], phi[obsNonZero[n]], pi_[obsNonZero[n]]);
}
for(n in 1:nZero){
target += tang_zi_zero_y_beta_lpdf(V0[n] | mu[obsZero[n]], phi[obsZero[n]], pi_[obsZero[n]], c);
}
// Priors
real lprior = 0;
lprior += normal_lupdf(b | 0,2);
lprior += normal_lupdf(Intercept | 0,2);
lprior += normal_lupdf(Intercept_phi | 0,2);
lprior += normal_lupdf(b_pi | 0,2);
// Informative prior on zero-inflation intercept to aid identifiability of zero sources
lprior += normal_lupdf(Intercept_pi | -5,0.5); // P(Y = 0) expected to be very low at covariate means
target += lprior;
}
Thanks for any advice,
Ciar