Model over-estimates inflation. Why?

jd_c · May 10, 2022, 3:35pm

I coded a slightly more thorough simulation and updated the model code to reflect the correction above and also to use logit links for the inflation parameters, so that they could be modeled as well. I made a simulation where the probabilities of choosing 0, 0.5, or 1 are influenced by treatment group. The code for the data and plotting is below.

library(brms)
library(ggplot2)


###simulate zomib data

trt <- c(rep(0, 1000), rep(1, 1000))
n <- length(trt)
a_zoi <- 0.25
b_zoi <- 0.2
a_coi <- 0.5
b_coi <- 0.3
a_mdi <- 0.1
b_mdi <- 0.3
a_b <- 0.5
b_b <- 0.25

zoi <- a_zoi + trt*b_zoi
coi <- a_coi + trt*b_coi
mdi <- a_mdi + trt*b_mdi
phi <- 2
mu <- a_b + trt*b_b
shape1 <- mu * phi
shape2 <- (1 - mu) * phi
y <- vector("numeric", n)
y <- ifelse(
    rbinom(n, 1, zoi)==1, 
    rbinom(n, 1, coi), 
    ifelse( rbinom(n, 1, mdi)==1, 0.5, rbeta(n, shape1, shape2))
  )

d1 <- cbind(trt,y)
d1 <- data.frame(d1)
d1$trt.f <- factor(d1$trt)
str(d1)

table((d1$y==0 | d1$y==1)[d1$trt==0])
table((d1$y==1)[d1$trt==0])
table((d1$y==0.5)[d1$trt==0])

hist(d1$y, breaks=100)

ggplot(d1, aes(y, fill = trt.f)) +
  geom_histogram(alpha=0.75, position="identity", binwidth = 0.01) + 
  scale_fill_manual(values = c("#0275d8", "#f0ad4e")) +
  theme(panel.grid.major.y = element_blank(), panel.grid.minor.y = element_blank()) +
  theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank()) +
  theme(panel.background = element_rect(fill = "white")) +
  theme(axis.line.x = element_line(colour = "black")) +
  theme(axis.line.y = element_line(colour = "black"))

As can be seen in the histogram, for trt==1, there is increased probability of neutral (0.5) and 1 choices, as well as a shift toward higher values.
Now define the custom response distribution for a zero-one-mid-inflated beta model and run the model:

###define custom response distribution for zomib in brms

zero_one_mid_inflated_beta <- custom_family(
  "zero_one_mid_inflated_beta", dpars = c("mu", "phi", "zoi", "coi", "mdi"),
  links = c("logit", "identity", "logit", "logit", "logit"),
  lb = c(0, 0, 0, 0, 0), ub = c(1, NA, 1, 1, 1),
  type = "real"
)


stan_funs <- "
     real zero_one_mid_inflated_beta_lpdf(real y, real mu, real phi,
                                    real zoi, real coi, real mdi) {
     row_vector[2] shape = [mu * phi, (1 - mu) * phi]; 
     if (y == 0) { 
       return bernoulli_lpmf(1 | zoi) + bernoulli_lpmf(0 | coi); 
     } else if (y == 1) {
       return bernoulli_lpmf(1 | zoi) + bernoulli_lpmf(1 | coi);
     } else if (y == 0.5) {
       return bernoulli_lpmf(1 | mdi) + bernoulli_lpmf(0 | zoi);
     } else { 
       return bernoulli_lpmf(0 | zoi) + bernoulli_lpmf(0 | mdi) + beta_lpdf(y | shape[1], shape[2]);
     } 
   }
"

stanvars <- stanvar(scode = stan_funs, block = "functions")


###run zomib model

m.zomib <- brm(bf(y ~ trt.f, zoi ~ trt.f, coi ~ trt.f, mdi ~ trt.f), 
	data=d1, family = zero_one_mid_inflated_beta, stanvars = stanvars, cores = 4)
m.zomib

Now, define the posterior predict function in brms so that we can run pp_check()

###define posterior predict function in brms for posterior predictive checks. run pp_check

posterior_predict_zero_one_mid_inflated_beta <- function(i, prep, ...) {
  zoi <- get_dpar(prep, "zoi", i)
  coi <- get_dpar(prep, "coi", i)
  mdi <- get_dpar(prep, "mdi", i)
  mu <- get_dpar(prep, "mu", i = i)
  phi <- get_dpar(prep, "phi", i = i)
  hu <- runif(prep$ndraws, 0, 1)
  one_or_zero <- runif(prep$ndraws, 0, 1)
  mid <- runif(prep$ndraws, 0, 1)
  ifelse(hu < zoi,
    ifelse(one_or_zero < coi, 1, 0),
    ifelse(mid < mdi & hu > zoi, 0.5, rbeta(prep$ndraws, shape1 = mu * phi, shape2 = (1 - mu) * phi))
  )
}

pp_check(m.zomib, type='hist')
pp_check(m.zomib, ndraws=30)

Looks good. Now compare to a zero-one-inflated beta model for the same data:

###compare to zoib model

m.zoib <- brm(bf(y ~ trt.f, zoi ~ trt.f, coi ~ trt.f), data=d1, family = zero_one_inflated_beta, cores = 4)
m.zoib

pp_check(m.zoib, type='hist')
pp_check(m.zoib, ndraws=30)

As can be seen from the posterior predictive checks, the zoib model doesn’t capture the mid-point inflation, while the zomib model does.
The recovery of the simulation parameters can be checked for each model in this code:

###compare recovery of parameters from the sim for the two models

##check to see if sim parameters were recovered by the zomib model
s.zomib <- as_draws_df(m.zomib)

#a_zoi, zero-one inflation for trt=0, was 0.25
a_zoi.zomib <- plogis(s.zomib$b_zoi_Intercept)
mean(a_zoi.zomib)
quantile(a_zoi.zomib, c(0.025, 0.975))

#b_zoi, change in zero-one inflation for trt=1 vs trt=0, was 0.2
b_zoi.zomib <- plogis(s.zomib$b_zoi_Intercept + s.zomib$b_zoi_trt.f1) - plogis(s.zomib$b_zoi_Intercept)
mean(b_zoi.zomib)
quantile(b_zoi.zomib, c(0.025, 0.975))

#a_coi, one inflation for trt=0, was 0.5
a_coi.zomib <- plogis(s.zomib$b_coi_Intercept)
mean(a_coi.zomib)
quantile(a_coi.zomib, c(0.025, 0.975))

#b_coi, change in one inflation for trt=1 vs trt=0, was 0.3
b_coi.zomib <- plogis(s.zomib$b_coi_Intercept + s.zomib$b_coi_trt.f1) - plogis(s.zomib$b_coi_Intercept)
mean(b_coi.zomib)
quantile(b_coi.zomib, c(0.025, 0.975))

#a_mdi, midpoint inflation conditional on not being a zero or one, for trt=0, was 0.1
a_mdi.zomib <- plogis(s.zomib$b_mdi_Intercept)
mean(a_mdi.zomib)
quantile(a_mdi.zomib, c(0.025, 0.975))

#b_mdi, change in midpoint inflation for trt=1 vs trt=0, was 0.3
b_mdi.zomib <- plogis(s.zomib$b_mdi_Intercept + s.zomib$b_mdi_trt.f1) - plogis(s.zomib$b_mdi_Intercept)
mean(b_mdi.zomib)
quantile(b_mdi.zomib, c(0.025, 0.975))

#a_b, mean for trt=0 for beta distribution, was 0.5
a_b.zomib <- plogis(s.zomib$b_Intercept)
mean(a_b.zomib)
quantile(a_b.zomib, c(0.025, 0.975))

#b_b, change in mean for beta distribution for trt=1 vs trt=0, was 0.25
b_b.zomib <- plogis(s.zomib$b_Intercept + s.zomib$b_trt.f1) - plogis(s.zomib$b_Intercept)
mean(b_b.zomib)
quantile(b_b.zomib, c(0.025, 0.975))


##compare sim parameters to those of the zoib model
s.zoib <- as_draws_df(m.zoib)

#a_zoi, zero-one inflation for trt=0, was 0.25
a_zoi.zoib <- plogis(s.zoib$b_zoi_Intercept)
mean(a_zoi.zoib)
quantile(a_zoi.zoib, c(0.025, 0.975))

#b_zoi, change in zero-one inflation for trt=1 vs trt=0, was 0.2
b_zoi.zoib <- plogis(s.zoib$b_zoi_Intercept + s.zoib$b_zoi_trt.f1) - plogis(s.zoib$b_zoi_Intercept)
mean(b_zoi.zoib)
quantile(b_zoi.zoib, c(0.025, 0.975))

#a_coi, one inflation for trt=0, was 0.5
a_coi.zoib <- plogis(s.zoib$b_coi_Intercept)
mean(a_coi.zoib)
quantile(a_coi.zoib, c(0.025, 0.975))

#b_coi, change in one inflation for trt=1 vs trt=0, was 0.3
b_coi.zoib <- plogis(s.zoib$b_coi_Intercept + s.zoib$b_coi_trt.f1) - plogis(s.zoib$b_coi_Intercept)
mean(b_coi.zoib)
quantile(b_coi.zoib, c(0.025, 0.975))

#a_b, mean for trt=0 for beta distribution, was 0.5
a_b.zoib <- plogis(s.zoib$b_Intercept)
mean(a_b.zoib)
quantile(a_b.zoib, c(0.025, 0.975))

#b_b, change in mean for beta distribution for trt=1 vs trt=0, was 0.25
b_b.zoib <- plogis(s.zoib$b_Intercept + s.zoib$b_trt.f1) - plogis(s.zoib$b_Intercept)
mean(b_b.zoib)
quantile(b_b.zoib, c(0.025, 0.975))

I will not post all of the output, but just note that the zomib model recovers all the simulation parameters quite well. The zoib model recovers the parameters (other than mid-point inflation, for which there is no parameter in the zoib model) quite well, except for the change in mu for the beta distribution for the trt==1 group, where it underestimates 0.19 (0.16 - 0.22) the programmed 0.25 value, while the zomib model recovers it.

So far so good. Here is where I get a little bit unsure.
Now, define posterior_epred for brms, so that we can use conditional_effects.

posterior_epred_zero_one_mid_inflated_beta <- function(prep) {
  with(prep$dpars, zoi * coi + (mdi * 0.5)*(1 - zoi) + mu * ( (1 - zoi) * (1 - mdi) ) )
}

conditional_effects(m.zomib)

Now let’s compare this to the zoib model.

conditional_effects(m.zoib)

Hmmm, they don’t look so different. Perhaps that is expected, as the zoib model simply seems to average over the midpoint inflation, so maybe it isn’t unexpected that the predictions would be so different.

Now try to define the log-likelihood for the zomib model, so that we can compare the two models via LOO.

###define log likelihood in brms to use with LOO. run loo() 

log_lik_weight <- function(x, i, prep) {
  weight <- prep$data$weights[i]
  if (!is.null(weight)) {
    x <- x * weight
  }
  x
}

log_lik_zero_one_mid_inflated_beta <- function(i, prep) {
  zoi <- get_dpar(prep, "zoi", i)
  coi <- get_dpar(prep, "coi", i)
  mdi <- get_dpar(prep, "mdi", i)
  if (prep$data$Y[i] %in% c(0, 1)) {
    out <- dbinom(1, size = 1, prob = zoi, log = TRUE) +
      dbinom(prep$data$Y[i], size = 1, prob = coi, log = TRUE)
  } else if (prep$data$Y[i] %in% c(0.5)) {
    out <- dbinom(1, size = 1, prob = mdi, log = TRUE) +
      dbinom(0, size = 1, prob = zoi, log = TRUE)
  } else {
    phi <- get_dpar(prep, "phi", i)
    mu <- get_dpar(prep, "mu", i)
    args <- list(shape1 = mu * phi, shape2 = (1 - mu) * phi)
    out <- dbinom(0, size = 1, prob = zoi, log = TRUE) +
      dbinom(0, size = 1, prob = mdi, log = TRUE) +
      do_call(dbeta, c(prep$data$Y[i], args, log = TRUE))
  }
  log_lik_weight(out, i = i, prep = prep)
}

loo(m.zomib, m.zoib, cores=1)

Now that does seem a bit strange. According to LOO the zomib model is much worse, but as we saw above, the posterior predictive plots and the proper recovery of the simulation parameters indicated that it was much better (in addition, it is the generative model for this data).
It makes me think that I have made an error in the log-likelihood code (and maybe also in the posterior epred code? as I would have thought to see more of difference between treatment groups)? @avehtari @paul.buerkner anyone else? @Solomon

Topic		Replies	Views
Zero inflated beta binomial model under brms brms	1	1555	February 19, 2020
Multilevel (mixed effects) model with zero-inflated beta family in brms brms rstan	3	2679	April 15, 2019
Custom response distribution - fitted stan function for mixture model brms	4	593	November 15, 2018
Zero-inflated Bernoulli brms	3	167	June 6, 2024
Help predicting from zero-inflated binomial brms techniques , specification	11	1660	December 6, 2021

Model over-estimates inflation. Why?

Related topics