Hello! I want to fit a multivariate non-linear model with shared parameters. I am used to work with brms and I tried specifying the model in brms in a way that express what I want it to do:
# make inverse logit link function
inv_logit <- function(x) 1 / (1 + exp(-x))
#specify the model
joint_mod1 <- mvbf(Nmr |
se(Nerr, sigma = TRUE) ~ (concm3 * (Q + (invlogit(k) * V))) / (exp(logI0) * ft * (
MG ^ b + (mgVar / 2) * b * (b - 1) * MG ^ (b - 2))),
BI |
se(BIerr, sigma = TRUE) ~ ((Q + invlogit(k) * V) * concm3 * MG) / (exp(logI0) *ft * (MG ^ b + (mgVar / 2) * b * (b - 1) * MG ^ (b - 2))),
logI0 ~ 1,
b ~ 1,
k ~ 1 + (1 | site),
nlf(sigma ~ int + concm3 ^ a),
int ~ 1,
a ~ 1,
nl = T,
family = gaussian()
)
prior <- c(
prior(normal(10,2), nlpar = "logI0", lb = 0),
prior(beta(0.7*4,(1-0.7)*4), nlpar = "b", lb = 0, ub = 1),
prior(normal(-3,1.5), nlpar = "k", lb = 0),
prior(exponential(4), class = "sd", nlpar = "k"),
prior(normal(0,0.5), class = "b", nlpar = "a", lb = 0),
prior(normal(0,0.5), class = "b", nlpar = "int", lb = 0)
)
When I try to do anyting with the formula specified above by example get_prior(joint_mod1, data=data)
, I get the error:
Error in formula.default(object, env = baseenv()) : 'formula' incorrecte
From what I understood from this post Joint Modelling in brms this model can not be fitted using brms. Is this right?
My alternative idea is to write the model as if the parameters were not shared in brms and get the stan code with the mak_stancode()
and make_standata()
functions. To do this I specified my r code was:
mod_joint <-
bf(
Nmr |
se(Nerr, sigma = TRUE) ~ (concm3 * (Q + (invlogit(
k
) * V))) / (exp(logI0) * ft * (
MG ^ b + (mgVar / 2) * b * (b - 1) * MG ^ (b - 2)
)),
logI0 ~ 1,
b ~ 1,
k ~ 1 + (1 | site),
nlf(sigma ~ int + concm3 ^ a),
int ~ 1,
a ~ 1,
nl = T,
family = gaussian()
) +
bf(
BI |
se(BIerr, sigma = TRUE) ~ ((Q + invlogit(k) * V) * concm3 * MG) / (exp(logI0) *
ft * (
MG ^ b + (mgVar / 2) * b * (b - 1) * MG ^ (b - 2)
)),
logI0 ~ 1,
b ~ 1,
k ~ 1 + (1 | site),
nlf(sigma ~ int + concm3 ^ a),
int ~ 1,
a ~ 1,
nl = T,
family = gaussian()
)
get_prior(mod_joint, data = data)
prior_stan_code <- c(
prior(normal(10,2), nlpar = "logI0", lb = 0, resp = "BI"),
prior(beta(0.7*4,(1-0.7)*4), nlpar = "b", lb = 0, ub = 1, resp = "BI"),
prior(normal(-3,1.5), nlpar = "k", lb = 0, resp = "BI"),
prior(exponential(3), class = "sd", nlpar = "k", resp = "BI"),
prior(normal(0,1), class = "b", nlpar = "a", lb = 0, resp = "BI"),
prior(normal(0,1), class = "b", nlpar = "int", lb = 0, resp = "BI"),
prior(normal(10,2), nlpar = "logI0", lb = 0, resp = "Nmr"),
prior(beta(0.7*4,(1-0.7)*4), nlpar = "b", lb = 0, ub = 1, resp = "Nmr"),
prior(normal(-3,1.5), nlpar = "k", lb = 0, resp = "Nmr"),
prior(exponential(3), class = "sd", nlpar = "k", resp = "Nmr"),
prior(normal(0,1), class = "b", nlpar = "a", lb = 0, resp = "Nmr"),
prior(normal(0,1), class = "b", nlpar = "int", lb = 0, resp = "Nmr")
)
make_stancode(mod_joint, data = data, prior = prior_stan_code)
## make stan data
stan_data <- make_standata(mod_joint, data = data, prior = prior_stan_code)
Here is an example of data:
structure(list(site = c("site1", "site1", "site1", "site1", "site2",
"site2", "site2", "site2", "site3", "site3", "site3", "site3"
), Nmr = c(3173L, 2966L, 1262L, 1149L, 504L, 453L, 777L, 704L,
1533L, 372L, 896L, 193L), Nerr = c(336.989795918367, 336.989795918367,
167.602040816327, 167.602040816327, 87.7551020408163, 87.7551020408163,
146.428571428571, 146.428571428571, 163.520408163265, 163.520408163265,
139.030612244898, 131.377551020408), concm3 = c(5148.1, 3723.06869230388,
1917.81995681897, 1694.86864942529, 2458.9, 473.748600012, 2370.455533366,
1104.59457333333, 1543.6, 4244.82828278788, 1939.86065651591,
632.783535353535), Q = c(44606.1973266835, 7148.80605247308,
40475.0499899619, 14328.9779815932, 1179.41546274407, 311.338271604938,
1011.65268657152, 343.723340046114, 3858.32694159881, 849.379914193153,
3493.00224732218, 1107.93016373347), V = c(319346.983077122,
319346.983077122, 319346.983077122, 319346.983077122, 56377.3469017162,
142137.4817856, 142137.4817856, 142137.4817856, 156332.500004466,
60228.1080636982, 60228.1080636982, 60228.1080636982), ft = c(0.964098761243345,
0.915984845013819, 0.957674570493223, 0.900846934099921, 0.861315579722472,
0.891627183842253, 0.750831703960004, 0.900846934099921, 0.48341486108144,
0.624795727445052, 0.502013926753242, 0.620807014871961), MG = c(290.378151260503,
289.866660225131, 386.116946778711, 385.555832813475, 108.302240896359,
108.449711723254, 142.233659491194, 142.732474964235, 79.6770744225835,
90.1723634238972, 96.9457831325302, 74.3433696348494), mgVar = c(7167.07851532402,
7083.85137352115, 4429.97998155698, 4310.38614224204, 1366.25000784314,
1255.20431299441, 563.231373668189, 586.130001516698, 404.951071351273,
411.166985148681, 661.822873934763, 404.439865874843), BI = c(586326.283422458,
547110.145417652, 310087.009803921, 281911.414847162, 34735.4823529412,
31263.0941704036, 70328.0794520548, 63944.1487839771, 77728.6077844312,
21346.2576687117, 55276.7228915663, 9130.71748878923), BIerr = c(62271.0288660917,
62161.3406099672, 41181.6289015605, 41121.7828179307, 6048.04721888756,
6056.28260272719, 13253.5909980431, 13300.0715307582, 8291.07219235,
9383.19560953216, 8577.18373493977, 6215.39535096549)), class = "data.frame", row.names = c(NA,
-12L))
The stan code produced is :
// generated with brms 2.21.0
functions {
}
data {
int<lower=1> N; // total number of observations
int<lower=1> N_Nmr; // number of observations
vector[N_Nmr] Y_Nmr; // response variable
vector<lower=0>[N_Nmr] se_Nmr; // known sampling error
int<lower=1> K_Nmr_logI0; // number of population-level effects
matrix[N_Nmr, K_Nmr_logI0] X_Nmr_logI0; // population-level design matrix
int<lower=1> K_Nmr_b; // number of population-level effects
matrix[N_Nmr, K_Nmr_b] X_Nmr_b; // population-level design matrix
int<lower=1> K_Nmr_k; // number of population-level effects
matrix[N_Nmr, K_Nmr_k] X_Nmr_k; // population-level design matrix
int<lower=1> K_Nmr_int; // number of population-level effects
matrix[N_Nmr, K_Nmr_int] X_Nmr_int; // population-level design matrix
int<lower=1> K_Nmr_a; // number of population-level effects
matrix[N_Nmr, K_Nmr_a] X_Nmr_a; // population-level design matrix
// covariates for non-linear functions
vector[N_Nmr] C_Nmr_1;
vector[N_Nmr] C_Nmr_2;
vector[N_Nmr] C_Nmr_3;
vector[N_Nmr] C_Nmr_4;
vector[N_Nmr] C_Nmr_5;
vector[N_Nmr] C_Nmr_6;
// covariates for non-linear functions
vector[N_Nmr] C_sigma_Nmr_1;
int<lower=1> N_BI; // number of observations
vector[N_BI] Y_BI; // response variable
vector<lower=0>[N_BI] se_BI; // known sampling error
int<lower=1> K_BI_logI0; // number of population-level effects
matrix[N_BI, K_BI_logI0] X_BI_logI0; // population-level design matrix
int<lower=1> K_BI_b; // number of population-level effects
matrix[N_BI, K_BI_b] X_BI_b; // population-level design matrix
int<lower=1> K_BI_k; // number of population-level effects
matrix[N_BI, K_BI_k] X_BI_k; // population-level design matrix
int<lower=1> K_BI_int; // number of population-level effects
matrix[N_BI, K_BI_int] X_BI_int; // population-level design matrix
int<lower=1> K_BI_a; // number of population-level effects
matrix[N_BI, K_BI_a] X_BI_a; // population-level design matrix
// covariates for non-linear functions
vector[N_BI] C_BI_1;
vector[N_BI] C_BI_2;
vector[N_BI] C_BI_3;
vector[N_BI] C_BI_4;
vector[N_BI] C_BI_5;
vector[N_BI] C_BI_6;
// covariates for non-linear functions
vector[N_BI] C_sigma_BI_1;
int<lower=1> nresp; // number of responses
int nrescor; // number of residual correlations
// data for group-level effects of ID 1
int<lower=1> N_1; // number of grouping levels
int<lower=1> M_1; // number of coefficients per level
array[N_Nmr] int<lower=1> J_1_Nmr; // grouping indicator per observation
// group-level predictor values
vector[N_Nmr] Z_1_Nmr_k_1;
// data for group-level effects of ID 2
int<lower=1> N_2; // number of grouping levels
int<lower=1> M_2; // number of coefficients per level
array[N_BI] int<lower=1> J_2_BI; // grouping indicator per observation
// group-level predictor values
vector[N_BI] Z_2_BI_k_1;
int prior_only; // should the likelihood be ignored?
}
transformed data {
vector<lower=0>[N_Nmr] se2_Nmr = square(se_Nmr);
vector<lower=0>[N_BI] se2_BI = square(se_BI);
array[N] vector[nresp] Y; // response array
for (n in 1:N) {
Y[n] = transpose([Y_Nmr[n], Y_BI[n]]);
}
}
parameters {
vector<lower=0>[K_Nmr_logI0] b_Nmr_logI0; // regression coefficients
vector<lower=0,upper=1>[K_Nmr_b] b_Nmr_b; // regression coefficients
vector<lower=0>[K_Nmr_k] b_Nmr_k; // regression coefficients
vector<lower=0>[K_Nmr_int] b_Nmr_int; // regression coefficients
vector<lower=0>[K_Nmr_a] b_Nmr_a; // regression coefficients
vector<lower=0>[K_BI_logI0] b_BI_logI0; // regression coefficients
vector<lower=0,upper=1>[K_BI_b] b_BI_b; // regression coefficients
vector<lower=0>[K_BI_k] b_BI_k; // regression coefficients
vector<lower=0>[K_BI_int] b_BI_int; // regression coefficients
vector<lower=0>[K_BI_a] b_BI_a; // regression coefficients
cholesky_factor_corr[nresp] Lrescor; // parameters for multivariate linear models
vector<lower=0>[M_1] sd_1; // group-level standard deviations
array[M_1] vector[N_1] z_1; // standardized group-level effects
vector<lower=0>[M_2] sd_2; // group-level standard deviations
array[M_2] vector[N_2] z_2; // standardized group-level effects
}
transformed parameters {
vector[N_1] r_1_Nmr_k_1; // actual group-level effects
vector[N_2] r_2_BI_k_1; // actual group-level effects
real lprior = 0; // prior contributions to the log posterior
r_1_Nmr_k_1 = (sd_1[1] * (z_1[1]));
r_2_BI_k_1 = (sd_2[1] * (z_2[1]));
lprior += normal_lpdf(b_Nmr_logI0 | 10, 2)
- 1 * normal_lccdf(0 | 10, 2);
lprior += beta_lpdf(b_Nmr_b | 0.7 * 4,(1 - 0.7) * 4);
lprior += normal_lpdf(b_Nmr_k | -3, 1.5)
- 1 * normal_lccdf(0 | -3, 1.5);
lprior += normal_lpdf(b_Nmr_int | 0, 1)
- 1 * normal_lccdf(0 | 0, 1);
lprior += normal_lpdf(b_Nmr_a | 0, 1)
- 1 * normal_lccdf(0 | 0, 1);
lprior += normal_lpdf(b_BI_logI0 | 10, 2)
- 1 * normal_lccdf(0 | 10, 2);
lprior += beta_lpdf(b_BI_b | 0.7 * 4,(1 - 0.7) * 4);
lprior += normal_lpdf(b_BI_k | -3, 1.5)
- 1 * normal_lccdf(0 | -3, 1.5);
lprior += normal_lpdf(b_BI_int | 0, 1)
- 1 * normal_lccdf(0 | 0, 1);
lprior += normal_lpdf(b_BI_a | 0, 1)
- 1 * normal_lccdf(0 | 0, 1);
lprior += lkj_corr_cholesky_lpdf(Lrescor | 1);
lprior += exponential_lpdf(sd_1 | 3);
lprior += exponential_lpdf(sd_2 | 3);
}
model {
// likelihood including constants
if (!prior_only) {
// initialize linear predictor term
vector[N_Nmr] nlp_Nmr_logI0 = rep_vector(0.0, N_Nmr);
// initialize linear predictor term
vector[N_Nmr] nlp_Nmr_b = rep_vector(0.0, N_Nmr);
// initialize linear predictor term
vector[N_Nmr] nlp_Nmr_k = rep_vector(0.0, N_Nmr);
// initialize linear predictor term
vector[N_Nmr] nlp_Nmr_int = rep_vector(0.0, N_Nmr);
// initialize linear predictor term
vector[N_Nmr] nlp_Nmr_a = rep_vector(0.0, N_Nmr);
// initialize non-linear predictor term
vector[N_Nmr] mu_Nmr;
// initialize non-linear predictor term
vector[N_Nmr] sigma_Nmr;
// initialize linear predictor term
vector[N_BI] nlp_BI_logI0 = rep_vector(0.0, N_BI);
// initialize linear predictor term
vector[N_BI] nlp_BI_b = rep_vector(0.0, N_BI);
// initialize linear predictor term
vector[N_BI] nlp_BI_k = rep_vector(0.0, N_BI);
// initialize linear predictor term
vector[N_BI] nlp_BI_int = rep_vector(0.0, N_BI);
// initialize linear predictor term
vector[N_BI] nlp_BI_a = rep_vector(0.0, N_BI);
// initialize non-linear predictor term
vector[N_BI] mu_BI;
// initialize non-linear predictor term
vector[N_BI] sigma_BI;
// multivariate predictor array
array[N] vector[nresp] Mu;
array[N] vector[nresp] sigma;
// cholesky factor of residual covariance matrix
array[N] matrix[nresp, nresp] LSigma;
nlp_Nmr_logI0 += X_Nmr_logI0 * b_Nmr_logI0;
nlp_Nmr_b += X_Nmr_b * b_Nmr_b;
nlp_Nmr_k += X_Nmr_k * b_Nmr_k;
nlp_Nmr_int += X_Nmr_int * b_Nmr_int;
nlp_Nmr_a += X_Nmr_a * b_Nmr_a;
nlp_BI_logI0 += X_BI_logI0 * b_BI_logI0;
nlp_BI_b += X_BI_b * b_BI_b;
nlp_BI_k += X_BI_k * b_BI_k;
nlp_BI_int += X_BI_int * b_BI_int;
nlp_BI_a += X_BI_a * b_BI_a;
for (n in 1:N_Nmr) {
// add more terms to the linear predictor
nlp_Nmr_k[n] += r_1_Nmr_k_1[J_1_Nmr[n]] * Z_1_Nmr_k_1[n];
}
for (n in 1:N_BI) {
// add more terms to the linear predictor
nlp_BI_k[n] += r_2_BI_k_1[J_2_BI[n]] * Z_2_BI_k_1[n];
}
for (n in 1:N_Nmr) {
// compute non-linear predictor values
mu_Nmr[n] = ((C_Nmr_1[n] * (C_Nmr_2[n] + (invlogit(nlp_Nmr_k[n]) * C_Nmr_3[n]))) / (exp(nlp_Nmr_logI0[n]) * C_Nmr_4[n] * (C_Nmr_5[n] ^ nlp_Nmr_b[n] + (C_Nmr_6[n] / 2) * nlp_Nmr_b[n] * (nlp_Nmr_b[n] - 1) * C_Nmr_5[n] ^ (nlp_Nmr_b[n] - 2))));
}
for (n in 1:N_Nmr) {
// compute non-linear predictor values
sigma_Nmr[n] = exp(nlp_Nmr_int[n] + C_sigma_Nmr_1[n] ^ nlp_Nmr_a[n]);
}
for (n in 1:N_BI) {
// compute non-linear predictor values
mu_BI[n] = (((C_BI_1[n] + invlogit(nlp_BI_k[n]) * C_BI_2[n]) * C_BI_3[n] * C_BI_4[n]) / (exp(nlp_BI_logI0[n]) * C_BI_5[n] * (C_BI_4[n] ^ nlp_BI_b[n] + (C_BI_6[n] / 2) * nlp_BI_b[n] * (nlp_BI_b[n] - 1) * C_BI_4[n] ^ (nlp_BI_b[n] - 2))));
}
for (n in 1:N_BI) {
// compute non-linear predictor values
sigma_BI[n] = exp(nlp_BI_int[n] + C_sigma_BI_1[n] ^ nlp_BI_a[n]);
}
// combine univariate parameters
for (n in 1:N) {
Mu[n] = transpose([mu_Nmr[n], mu_BI[n]]);
sigma[n] = transpose([sqrt(square(sigma_Nmr[n]) + se2_Nmr[n]), sqrt(square(sigma_BI[n]) + se2_BI[n])]);
LSigma[n] = diag_pre_multiply(sigma[n], Lrescor);
}
for (n in 1:N) {
target += multi_normal_cholesky_lpdf(Y[n] | Mu[n], LSigma[n]);
}
}
// priors including constants
target += lprior;
target += std_normal_lpdf(z_1[1]);
target += std_normal_lpdf(z_2[1]);
}
generated quantities {
// residual correlations
corr_matrix[nresp] Rescor = multiply_lower_tri_self_transpose(Lrescor);
vector<lower=-1,upper=1>[nrescor] rescor;
// extract upper diagonal of correlation matrix
for (k in 1:nresp) {
for (j in 1:(k - 1)) {
rescor[choose(k - 1, 2) + j] = Rescor[j, k];
}
}
}
How can I specify that the parameters I0, b, and k are shared in the stan model? I also want the k estimated for each site with the random effect to be the same between the two sub-models.
Thank you!