Hello.
I am fitting a beta-Binomial model to describe a probability of success p given n successes from N trials as a function of explanatory variable sets X and Z describing the mean mu and variance phi of a beta distribution as follows (in JAGS):
for (i in 1:nobs) {
n[i] ~ dbinom(p[i], N[i])
p[i] ~ dbeta(alpha[i], beta[i])
alpha[i] <- mu[i] * phi[i]
beta[i] <- (1 - mu[i]) * phi[i]
mu[i] <- atan(mu_[i]) / pi + (1/2)
mu_[i] <- mu_a_mn + mu_a_re[group[i]] + inprod(X[i,], mu_b)
log(phi[i]) <- phi_[i]
phi_[i] <- phi_a_mn + phi_a_re[group[i]] + inprod(Z[i, ], phi_b)
}
There are terms to allow for group-level effects in mu and phi and mu is Cauchit-transformed rather than the conventional logit-transformed to allow more flexibility to fit the perceived highly variable p.
Simulations suggest I can recover ok generating parameter estimates when fit in brms using the beta_binomial2
custom family (Define Custom Response Distributions with brms) as follows (in R):
# data structure
nyears <- 25
ngroups <- 9
nobs <- nyears * ngroups
nx <- 3
nz <- 1
# parameters
## mu
mu_a_mn <- 2
mu_a_sd <- 1
mu_a_re <- rnorm(ngroups, 0, mu_a_sd)
mu_b <- c(1.11, -1.56, 1.2)
## phi
phi_a_mn <- 3
phi_a_sd <- 2
phi_a_re <- rnorm(ngroups, 0, phi_a_sd)
phi_b <- -1.5
# simulate data
groups <- rep(1:ngroups, each = nyears)
years <- rep(1:nyears, times = ngroups)
## mu
X <- sapply(rep(0, nx), rnorm, n = nobs, sd = 1)
mu_ <- mu_a_mn + mu_a_re[groups] + X %*% mu_b
mu <- as.numeric(atan(mu_) / pi + (1/2))
## phi
Z <- sapply(rep(0, nz), rnorm, n = nobs, sd = 1)
phi_ <- phi_a_mn + phi_a_re[groups] + Z %*% phi_b
phi <- exp(as.numeric(phi_))
## Beta-distributed probability of success (p)
alpha <- mu * phi
beta <- (1 - mu) * phi
p <- rbeta(nobs, alpha, beta)
## trial sizes (N)
Nhat <- sample(x = seq(50, 2000, 25), size = ngroups)
Nmat <- sapply(1:ngroups, function(v) rpois(nyears, Nhat[v]))
N <- as.numeric(Nmat)
## number of successes (n)
n <- rbinom(nobs, N, p)
# fit using brms
library(brms)
## store data in data frame
d <- data.frame(n, N, p, X, Z, groups, years)
## custom family: beta binomial mean and var with cauchit mu link
beta_binomial2 <- custom_family('beta_binomial2',
dpars = c('mu', 'phi'),
links = c('cauchit', 'log'),
lb = c(NA, 0),
type = 'int', vars = 'vint1[n]')
## corresponding *lpmf and *rng based on beta_binomial in stan
stan_funs <- '
real beta_binomial2_lpmf(int y, real mu, real phi, int T) {
return beta_binomial_lpmf(y | T, mu * phi, (1 - mu) * phi);
}
int beta_binomial2_rng(real mu, real phi, int T) {
return beta_binomial_rng(T, mu * phi, (1 - mu) * phi);
}'
## put the stan functions in the functions block and add trials
stanvars <- stanvar(scode = stan_funs, block = 'functions')
## fit
brm_fit <- brm(bf(n | vint(N) ~ 1 + X1 + X2 + X3 + (1|groups),
phi ~ 1 + Z + (1|groups)),
data = d,
family = beta_binomial2,
stanvars = stanvars,
chains = 3, cores = 3)
## print a summary
print(summary(brm_fit))
Now, I would to use this model to predict p for new groups where I do not have N (I do have n), i.e.:
# prediction functions
posterior_predict_beta_binomial2 <- function(i, prep, ...) {
mu <- prep$dpars$mu[, i]
phi <- prep$dpars$phi[, i]
trials <- prep$data$vint1[i]
beta_binomial2_rng(mu, phi, trials)
}
# predict to new groups without N
d1 <- d
d1$groups <- factor(as.integer(d$groups) + ngroups)
d1$N <- NA
predict(brm_fit, newdata = d1, allow_new_levels = TRUE)
# Error: 'vint' requires whole numbers as input.
Of course it breaks because it is predicting the Binomial part too and it is looking for integers in N and not the NA
s I have given.
Is it possible to predict the beta part only using brms
?
If not, is there any reason why I could not draw samples from the relevant posteriors to generate predictions manually?
Many thanks for any guidance,
Stephen