I guess I would have to see the model to know if I could offer any other suggestions. If you have a model for the number of hospitalized patients but you don’t know the number for every hospital, then it seems you could use the missing data approach and use brms. I’m thinking something like (very simplified):
bf(patient_pop | mi() ~ 1 + mi(number_beds) + (1|p|hospital)) + poisson()
bf(number_beds | mi() ~ 1 + (1|p|hospital)) + poisson()
bf(hai ~ 1 + log(mi(patient_pop)) + x + (1|p|hospital)) + poisson()
if you have some information on beds and patient population for some but not all hospitals, but I may not be understanding what you are trying to get at.
Regarding the joint model, the likelihood part looked like this:
for (n in 1:N) {
target += bernoulli_logit_lpmf(Y[n] | a + r_1_1[J_1[n]] + r_2_1[J_2[n]]); // model 1
}
for (n_2 in 1:N_2) {
target += bernoulli_logit_lpmf(y_c[n_2] | a_b + (inv_logit(a + r_1_1[K_1[n_2]] + r_2_1[K_2[n_2]]))*b); // model 2
}
where Y
and y_c
were different binary outcomes, J_1
and J_2
were id’s for nested varying intercepts, and K_1
and K_2
corresponding id’s for model 2. Notice that the estimated probability for each level from model 1, the (inv_logit(a + r_1_1[K_1[n_2]] + r_2_1[K_2[n_2]]))
part, became the data
(so to speak) as a predictor in model 2 (the parameter b
is the coefficient for the effect of (inv_logit(a + r_1_1[K_1[n_2]] + r_2_1[K_2[n_2]]))
on the outcome y_c
.
I hope I got that all right…it was a while ago that I wrote that. It’s not like the case you are asking about, but I just thought I would mention it as an idea that you can use parameters from one model in another model as predictors. The full data sim and code is below (note it could take a long time to run depending on how many groups of decent size you make in the sim). Let me know if you spot any errors, but I think it works.
library(rstan)
library(bayesplot)
#data
N_1 <- 50 #number of sites
N_t <- 10 #number of timepoints per site
N_2 <- N_t*N_1 #total number of timepoint by site combinations
N_3 <- 30 #number of observations per timepoint within sites
N <- N_2*N_3 #number of observations in model 1
J_1 <- rep(1:N_1, each=(N/N_1)) #site id model 1
J_2 <- rep(1:N_t, each=((N/N_1)/N_t), times=N_1) #timepoint id (to use with brms if needed)
J_2s <- rep(1:N_2, each=N_3) #timepoint id for sim (this is an id for all site by timepoint combinations)
a <- rep(-2, times=N) #model 1 intercept
a_1 <- rnorm(N_1, 0, 1.5) #varying intercepts for site
a_2 <- rnorm(N_2, 0, 0.75) #varying intercepts for timepoint within site
p <- plogis(a + a_1[J_1] + a_2[J_2s]) #model 1
Y <- rbinom(N, 1, p=p) #model 1
K_1 <- rep(1:N_1, each=N_t) #site id model 2
K_2 <- 1:N_2
a_b <- rep(-3, times=N_2) #model 2 intercept
b <- rep(3, times=N_2) #model 2 effect of site-timepoint proportion on outcome
p_c <- plogis(a_b + (plogis(a[1:N_2] + a_1[K_1] + a_2[K_2]))*b) #model 2
y_c <- rbinom(N_2, 1, p=p_c) #model 2
#stan data
d1 <- list(N = N, #total number of observations in the model 1 data
Y = Y, #binary response for model 1
N_1 = N_1, #number of sites
J_1 = J_1, #site id for model 1
N_2 = N_2, #number of site by timepoint combinations for model 1 data
J_2 = J_2s, #site by timepoint combination id for model 1 data
y_c = y_c, #binary response for model 2
K_1 = K_1, #site id for model 2
K_2 = K_2 #site by timepoint combination id for model 2 data
)
#stan code
stan_code1 <- "
data {
int<lower=1> N; // total number of observations
int Y[N]; // response variable for model 1
// data for group-level effects of site
int<lower=1> N_1; // number of grouping levels
int<lower=1> J_1[N]; // grouping indicator per observation, site level id for model 1
// data for group-level effects of site-timepoint combination
int<lower=1> N_2; // number of grouping levels NOTE THAT THIS IS THE SAME LENGTH AS MODEL 2 DATASET
int<lower=1> J_2[N]; // grouping indicator per observation, site by timepoint combination id for model 1
// model 2 data
int y_c[N_2]; // model 2 outcome
int<lower=1> K_1[N_2]; // grouping indicator per observation, site level id for model 2
int<lower=1> K_2[N_2]; // grouping indicator per observation, site-timepoint id for model 2 (note, K_2 same length as N_2)
}
parameters {
real a; // intercept for model 1
real<lower=0> sd_1; // group-level standard deviations for site
vector[N_1] z_1; // standardized group-level effects for site, needed for non-centered parameterization
real<lower=0> sd_2; // group-level standard deviations for site-timepoint combination
vector[N_2] z_2; // standardized group-level effects for site-timepoint combination, needed for non-centered parameterization
real a_b; // intercept for model 2
real b; // effect of proportion of site-timepoint estimate of outcome from model 1 on model 2 outcome
}
transformed parameters {
vector[N_1] r_1_1; // actual group-level effects for site
vector[N_2] r_2_1; // actual group-level effects for site-timepoint combination
r_1_1 = (sd_1 * z_1); // non-centered parameterization, group-level effect site
r_2_1 = (sd_2 * z_2); // non-centered parameterization, group-level effect site-timepoint combination
}
model {
for (n in 1:N) {
target += bernoulli_logit_lpmf(Y[n] | a + r_1_1[J_1[n]] + r_2_1[J_2[n]]); // model 1
}
for (n_2 in 1:N_2) {
target += bernoulli_logit_lpmf(y_c[n_2] | a_b + (inv_logit(a + r_1_1[K_1[n_2]] + r_2_1[K_2[n_2]]))*b); // model 2
}
// priors
target += normal_lpdf(a | 0, 5);
target += normal_lpdf(sd_1 | 0, 2.5)
- 1 * normal_lccdf(0 | 0, 2.5);
target += std_normal_lpdf(z_1);
target += normal_lpdf(sd_2 | 0, 2.5)
- 1 * normal_lccdf(0 | 0, 2.5);
target += std_normal_lpdf(z_2);
target += normal_lpdf(a_b | -5, 2.5);
target += normal_lpdf(b | 0, 2.5);
}
generated quantities {
vector[N] p_m1_pred;
vector[N_2] p_m2_pred;
real b_or;
for (n in 1:N) {
p_m1_pred[n] = bernoulli_logit_rng(a + r_1_1[J_1[n]] + r_2_1[J_2[n]]); // model 1 posterior predictive distribution
}
for (n_2 in 1:N_2) {
p_m2_pred[n_2] = bernoulli_logit_rng(a_b + (inv_logit(a + r_1_1[K_1[n_2]] + r_2_1[K_2[n_2]]))*b); // model 2 posterior predictive distribution
}
b_or = exp(b); // Odds ratio for b from model 2
}
"
#fit model
fit1 <- stan(model_code = stan_code1, data = d1,
chains = 4, iter = 2000, warmup = 1000,
thin = 1, cores = 4)
print(fit1, pars=c("sd_1", "sd_2", "a", "a_b", "b", "b_or"))
#posterior predictive checks
yrep_m1 <- extract(fit1)$p_m1_pred
yrep_m2 <- extract(fit1)$p_m2_pred
ppc_dens_overlay(Y, yrep = yrep_m1[1:30, ]) #density plot for model 1
ppc_bars(Y, yrep = yrep_m1[1:30, ]) #bars plot for model 1
ppc_dens_overlay(y_c, yrep = yrep_m2[1:30, ]) #for model 2
ppc_bars(y_c, yrep = yrep_m2[1:30, ]) #bars plot for model 2