Fit multivariate non-linear model with shared parameters

Hello! I want to fit a multivariate non-linear model with shared parameters. I am used to work with brms and I tried specifying the model in brms in a way that express what I want it to do:

# make inverse logit link function 
inv_logit <- function(x) 1 / (1 + exp(-x))

#specify the model
joint_mod1 <- mvbf(Nmr |
                     se(Nerr, sigma = TRUE) ~ (concm3 * (Q + (invlogit(k) * V))) / (exp(logI0) * ft * (
                       MG ^ b + (mgVar / 2) * b * (b - 1) * MG ^ (b - 2))),
                   BI |
                     se(BIerr, sigma = TRUE) ~ ((Q + invlogit(k) * V) * concm3 * MG) / (exp(logI0) *ft * (MG ^ b + (mgVar / 2) * b * (b - 1) * MG ^ (b - 2))),
                   logI0 ~ 1,
                   b ~ 1,
                   k ~ 1 + (1 | site),
                   nlf(sigma ~ int + concm3 ^ a),
                   int ~ 1,
                   a ~ 1,
                   nl = T,
                   family = gaussian()
                   )

prior <- c(
  prior(normal(10,2), nlpar = "logI0", lb = 0), 
  prior(beta(0.7*4,(1-0.7)*4), nlpar = "b", lb = 0, ub = 1),
  prior(normal(-3,1.5), nlpar = "k", lb = 0),
  prior(exponential(4), class = "sd", nlpar = "k"),
  prior(normal(0,0.5), class = "b", nlpar = "a", lb = 0),
  prior(normal(0,0.5), class = "b", nlpar = "int", lb = 0)
)

When I try to do anyting with the formula specified above by example get_prior(joint_mod1, data=data), I get the error:

Error in formula.default(object, env = baseenv()) : 'formula' incorrecte

From what I understood from this post Joint Modelling in brms this model can not be fitted using brms. Is this right?

My alternative idea is to write the model as if the parameters were not shared in brms and get the stan code with the mak_stancode() and make_standata() functions. To do this I specified my r code was:

mod_joint <-
  bf(
    Nmr |
      se(Nerr, sigma = TRUE) ~ (concm3 * (Q + (invlogit(
        k
      ) * V))) / (exp(logI0) * ft * (
        MG ^ b + (mgVar / 2) * b * (b - 1) * MG ^ (b - 2)
      )),
    logI0 ~ 1,
    b ~ 1,
    k ~ 1 + (1 | site),
    nlf(sigma ~ int + concm3 ^ a),
    int ~ 1,
    a ~ 1,
    nl = T,
    family = gaussian()
  ) +
  bf(
    BI |
      se(BIerr, sigma = TRUE) ~ ((Q + invlogit(k) * V) * concm3 * MG) / (exp(logI0) *
                                                                                ft * (
                                                                                  MG ^ b + (mgVar / 2) * b * (b - 1) * MG ^ (b - 2)
                                                                                )),
    logI0 ~ 1,
    b ~ 1,
    k ~ 1 + (1 | site),
    nlf(sigma ~ int + concm3 ^ a),
    int ~ 1,
    a ~ 1,
    nl = T,
    family = gaussian()
  )
get_prior(mod_joint, data = data)

prior_stan_code <- c(
  prior(normal(10,2), nlpar = "logI0", lb = 0, resp = "BI"), 
  prior(beta(0.7*4,(1-0.7)*4), nlpar = "b", lb = 0, ub = 1, resp = "BI"), 
  prior(normal(-3,1.5), nlpar = "k", lb = 0, resp = "BI"),
  prior(exponential(3), class = "sd", nlpar = "k", resp = "BI"),
  prior(normal(0,1), class = "b", nlpar = "a", lb = 0, resp = "BI"),
  prior(normal(0,1), class = "b", nlpar = "int", lb = 0, resp = "BI"),
  prior(normal(10,2), nlpar = "logI0", lb = 0, resp = "Nmr"), 
  prior(beta(0.7*4,(1-0.7)*4), nlpar = "b", lb = 0, ub = 1, resp = "Nmr"), 
  prior(normal(-3,1.5), nlpar = "k", lb = 0, resp = "Nmr"),
  prior(exponential(3), class = "sd", nlpar = "k", resp = "Nmr"),
  prior(normal(0,1), class = "b", nlpar = "a", lb = 0, resp = "Nmr"),
  prior(normal(0,1), class = "b", nlpar = "int", lb = 0, resp = "Nmr")
)

make_stancode(mod_joint, data = data, prior = prior_stan_code)

## make stan data
stan_data <- make_standata(mod_joint, data = data, prior = prior_stan_code)

Here is an example of data:

structure(list(site = c("site1", "site1", "site1", "site1", "site2", 
"site2", "site2", "site2", "site3", "site3", "site3", "site3"
), Nmr = c(3173L, 2966L, 1262L, 1149L, 504L, 453L, 777L, 704L, 
1533L, 372L, 896L, 193L), Nerr = c(336.989795918367, 336.989795918367, 
167.602040816327, 167.602040816327, 87.7551020408163, 87.7551020408163, 
146.428571428571, 146.428571428571, 163.520408163265, 163.520408163265, 
139.030612244898, 131.377551020408), concm3 = c(5148.1, 3723.06869230388, 
1917.81995681897, 1694.86864942529, 2458.9, 473.748600012, 2370.455533366, 
1104.59457333333, 1543.6, 4244.82828278788, 1939.86065651591, 
632.783535353535), Q = c(44606.1973266835, 7148.80605247308, 
40475.0499899619, 14328.9779815932, 1179.41546274407, 311.338271604938, 
1011.65268657152, 343.723340046114, 3858.32694159881, 849.379914193153, 
3493.00224732218, 1107.93016373347), V = c(319346.983077122, 
319346.983077122, 319346.983077122, 319346.983077122, 56377.3469017162, 
142137.4817856, 142137.4817856, 142137.4817856, 156332.500004466, 
60228.1080636982, 60228.1080636982, 60228.1080636982), ft = c(0.964098761243345, 
0.915984845013819, 0.957674570493223, 0.900846934099921, 0.861315579722472, 
0.891627183842253, 0.750831703960004, 0.900846934099921, 0.48341486108144, 
0.624795727445052, 0.502013926753242, 0.620807014871961), MG = c(290.378151260503, 
289.866660225131, 386.116946778711, 385.555832813475, 108.302240896359, 
108.449711723254, 142.233659491194, 142.732474964235, 79.6770744225835, 
90.1723634238972, 96.9457831325302, 74.3433696348494), mgVar = c(7167.07851532402, 
7083.85137352115, 4429.97998155698, 4310.38614224204, 1366.25000784314, 
1255.20431299441, 563.231373668189, 586.130001516698, 404.951071351273, 
411.166985148681, 661.822873934763, 404.439865874843), BI = c(586326.283422458, 
547110.145417652, 310087.009803921, 281911.414847162, 34735.4823529412, 
31263.0941704036, 70328.0794520548, 63944.1487839771, 77728.6077844312, 
21346.2576687117, 55276.7228915663, 9130.71748878923), BIerr = c(62271.0288660917, 
62161.3406099672, 41181.6289015605, 41121.7828179307, 6048.04721888756, 
6056.28260272719, 13253.5909980431, 13300.0715307582, 8291.07219235, 
9383.19560953216, 8577.18373493977, 6215.39535096549)), class = "data.frame", row.names = c(NA, 
-12L))

The stan code produced is :

// generated with brms 2.21.0
functions {
}
data {
  int<lower=1> N;  // total number of observations
  int<lower=1> N_Nmr;  // number of observations
  vector[N_Nmr] Y_Nmr;  // response variable
  vector<lower=0>[N_Nmr] se_Nmr;  // known sampling error
  int<lower=1> K_Nmr_logI0;  // number of population-level effects
  matrix[N_Nmr, K_Nmr_logI0] X_Nmr_logI0;  // population-level design matrix
  int<lower=1> K_Nmr_b;  // number of population-level effects
  matrix[N_Nmr, K_Nmr_b] X_Nmr_b;  // population-level design matrix
  int<lower=1> K_Nmr_k;  // number of population-level effects
  matrix[N_Nmr, K_Nmr_k] X_Nmr_k;  // population-level design matrix
  int<lower=1> K_Nmr_int;  // number of population-level effects
  matrix[N_Nmr, K_Nmr_int] X_Nmr_int;  // population-level design matrix
  int<lower=1> K_Nmr_a;  // number of population-level effects
  matrix[N_Nmr, K_Nmr_a] X_Nmr_a;  // population-level design matrix
  // covariates for non-linear functions
  vector[N_Nmr] C_Nmr_1;
  vector[N_Nmr] C_Nmr_2;
  vector[N_Nmr] C_Nmr_3;
  vector[N_Nmr] C_Nmr_4;
  vector[N_Nmr] C_Nmr_5;
  vector[N_Nmr] C_Nmr_6;
  // covariates for non-linear functions
  vector[N_Nmr] C_sigma_Nmr_1;
  int<lower=1> N_BI;  // number of observations
  vector[N_BI] Y_BI;  // response variable
  vector<lower=0>[N_BI] se_BI;  // known sampling error
  int<lower=1> K_BI_logI0;  // number of population-level effects
  matrix[N_BI, K_BI_logI0] X_BI_logI0;  // population-level design matrix
  int<lower=1> K_BI_b;  // number of population-level effects
  matrix[N_BI, K_BI_b] X_BI_b;  // population-level design matrix
  int<lower=1> K_BI_k;  // number of population-level effects
  matrix[N_BI, K_BI_k] X_BI_k;  // population-level design matrix
  int<lower=1> K_BI_int;  // number of population-level effects
  matrix[N_BI, K_BI_int] X_BI_int;  // population-level design matrix
  int<lower=1> K_BI_a;  // number of population-level effects
  matrix[N_BI, K_BI_a] X_BI_a;  // population-level design matrix
  // covariates for non-linear functions
  vector[N_BI] C_BI_1;
  vector[N_BI] C_BI_2;
  vector[N_BI] C_BI_3;
  vector[N_BI] C_BI_4;
  vector[N_BI] C_BI_5;
  vector[N_BI] C_BI_6;
  // covariates for non-linear functions
  vector[N_BI] C_sigma_BI_1;
  int<lower=1> nresp;  // number of responses
  int nrescor;  // number of residual correlations
  // data for group-level effects of ID 1
  int<lower=1> N_1;  // number of grouping levels
  int<lower=1> M_1;  // number of coefficients per level
  array[N_Nmr] int<lower=1> J_1_Nmr;  // grouping indicator per observation
  // group-level predictor values
  vector[N_Nmr] Z_1_Nmr_k_1;
  // data for group-level effects of ID 2
  int<lower=1> N_2;  // number of grouping levels
  int<lower=1> M_2;  // number of coefficients per level
  array[N_BI] int<lower=1> J_2_BI;  // grouping indicator per observation
  // group-level predictor values
  vector[N_BI] Z_2_BI_k_1;
  int prior_only;  // should the likelihood be ignored?
}
transformed data {
  vector<lower=0>[N_Nmr] se2_Nmr = square(se_Nmr);
  vector<lower=0>[N_BI] se2_BI = square(se_BI);
  array[N] vector[nresp] Y;  // response array
  for (n in 1:N) {
    Y[n] = transpose([Y_Nmr[n], Y_BI[n]]);
  }
}
parameters {
  vector<lower=0>[K_Nmr_logI0] b_Nmr_logI0;  // regression coefficients
  vector<lower=0,upper=1>[K_Nmr_b] b_Nmr_b;  // regression coefficients
  vector<lower=0>[K_Nmr_k] b_Nmr_k;  // regression coefficients
  vector<lower=0>[K_Nmr_int] b_Nmr_int;  // regression coefficients
  vector<lower=0>[K_Nmr_a] b_Nmr_a;  // regression coefficients
  vector<lower=0>[K_BI_logI0] b_BI_logI0;  // regression coefficients
  vector<lower=0,upper=1>[K_BI_b] b_BI_b;  // regression coefficients
  vector<lower=0>[K_BI_k] b_BI_k;  // regression coefficients
  vector<lower=0>[K_BI_int] b_BI_int;  // regression coefficients
  vector<lower=0>[K_BI_a] b_BI_a;  // regression coefficients
  cholesky_factor_corr[nresp] Lrescor;  // parameters for multivariate linear models
  vector<lower=0>[M_1] sd_1;  // group-level standard deviations
  array[M_1] vector[N_1] z_1;  // standardized group-level effects
  vector<lower=0>[M_2] sd_2;  // group-level standard deviations
  array[M_2] vector[N_2] z_2;  // standardized group-level effects
}
transformed parameters {
  vector[N_1] r_1_Nmr_k_1;  // actual group-level effects
  vector[N_2] r_2_BI_k_1;  // actual group-level effects
  real lprior = 0;  // prior contributions to the log posterior
  r_1_Nmr_k_1 = (sd_1[1] * (z_1[1]));
  r_2_BI_k_1 = (sd_2[1] * (z_2[1]));
  lprior += normal_lpdf(b_Nmr_logI0 | 10, 2)
    - 1 * normal_lccdf(0 | 10, 2);
  lprior += beta_lpdf(b_Nmr_b | 0.7 * 4,(1 - 0.7) * 4);
  lprior += normal_lpdf(b_Nmr_k | -3, 1.5)
    - 1 * normal_lccdf(0 | -3, 1.5);
  lprior += normal_lpdf(b_Nmr_int | 0, 1)
    - 1 * normal_lccdf(0 | 0, 1);
  lprior += normal_lpdf(b_Nmr_a | 0, 1)
    - 1 * normal_lccdf(0 | 0, 1);
  lprior += normal_lpdf(b_BI_logI0 | 10, 2)
    - 1 * normal_lccdf(0 | 10, 2);
  lprior += beta_lpdf(b_BI_b | 0.7 * 4,(1 - 0.7) * 4);
  lprior += normal_lpdf(b_BI_k | -3, 1.5)
    - 1 * normal_lccdf(0 | -3, 1.5);
  lprior += normal_lpdf(b_BI_int | 0, 1)
    - 1 * normal_lccdf(0 | 0, 1);
  lprior += normal_lpdf(b_BI_a | 0, 1)
    - 1 * normal_lccdf(0 | 0, 1);
  lprior += lkj_corr_cholesky_lpdf(Lrescor | 1);
  lprior += exponential_lpdf(sd_1 | 3);
  lprior += exponential_lpdf(sd_2 | 3);
}
model {
  // likelihood including constants
  if (!prior_only) {
    // initialize linear predictor term
    vector[N_Nmr] nlp_Nmr_logI0 = rep_vector(0.0, N_Nmr);
    // initialize linear predictor term
    vector[N_Nmr] nlp_Nmr_b = rep_vector(0.0, N_Nmr);
    // initialize linear predictor term
    vector[N_Nmr] nlp_Nmr_k = rep_vector(0.0, N_Nmr);
    // initialize linear predictor term
    vector[N_Nmr] nlp_Nmr_int = rep_vector(0.0, N_Nmr);
    // initialize linear predictor term
    vector[N_Nmr] nlp_Nmr_a = rep_vector(0.0, N_Nmr);
    // initialize non-linear predictor term
    vector[N_Nmr] mu_Nmr;
    // initialize non-linear predictor term
    vector[N_Nmr] sigma_Nmr;
    // initialize linear predictor term
    vector[N_BI] nlp_BI_logI0 = rep_vector(0.0, N_BI);
    // initialize linear predictor term
    vector[N_BI] nlp_BI_b = rep_vector(0.0, N_BI);
    // initialize linear predictor term
    vector[N_BI] nlp_BI_k = rep_vector(0.0, N_BI);
    // initialize linear predictor term
    vector[N_BI] nlp_BI_int = rep_vector(0.0, N_BI);
    // initialize linear predictor term
    vector[N_BI] nlp_BI_a = rep_vector(0.0, N_BI);
    // initialize non-linear predictor term
    vector[N_BI] mu_BI;
    // initialize non-linear predictor term
    vector[N_BI] sigma_BI;
    // multivariate predictor array
    array[N] vector[nresp] Mu;
    array[N] vector[nresp] sigma;
    // cholesky factor of residual covariance matrix
    array[N] matrix[nresp, nresp] LSigma;
    nlp_Nmr_logI0 += X_Nmr_logI0 * b_Nmr_logI0;
    nlp_Nmr_b += X_Nmr_b * b_Nmr_b;
    nlp_Nmr_k += X_Nmr_k * b_Nmr_k;
    nlp_Nmr_int += X_Nmr_int * b_Nmr_int;
    nlp_Nmr_a += X_Nmr_a * b_Nmr_a;
    nlp_BI_logI0 += X_BI_logI0 * b_BI_logI0;
    nlp_BI_b += X_BI_b * b_BI_b;
    nlp_BI_k += X_BI_k * b_BI_k;
    nlp_BI_int += X_BI_int * b_BI_int;
    nlp_BI_a += X_BI_a * b_BI_a;
    for (n in 1:N_Nmr) {
      // add more terms to the linear predictor
      nlp_Nmr_k[n] += r_1_Nmr_k_1[J_1_Nmr[n]] * Z_1_Nmr_k_1[n];
    }
    for (n in 1:N_BI) {
      // add more terms to the linear predictor
      nlp_BI_k[n] += r_2_BI_k_1[J_2_BI[n]] * Z_2_BI_k_1[n];
    }
    for (n in 1:N_Nmr) {
      // compute non-linear predictor values
      mu_Nmr[n] = ((C_Nmr_1[n] * (C_Nmr_2[n] + (invlogit(nlp_Nmr_k[n]) * C_Nmr_3[n]))) / (exp(nlp_Nmr_logI0[n]) * C_Nmr_4[n] * (C_Nmr_5[n] ^ nlp_Nmr_b[n] + (C_Nmr_6[n] / 2) * nlp_Nmr_b[n] * (nlp_Nmr_b[n] - 1) * C_Nmr_5[n] ^ (nlp_Nmr_b[n] - 2))));
    }
    for (n in 1:N_Nmr) {
      // compute non-linear predictor values
      sigma_Nmr[n] = exp(nlp_Nmr_int[n] + C_sigma_Nmr_1[n] ^ nlp_Nmr_a[n]);
    }
    for (n in 1:N_BI) {
      // compute non-linear predictor values
      mu_BI[n] = (((C_BI_1[n] + invlogit(nlp_BI_k[n]) * C_BI_2[n]) * C_BI_3[n] * C_BI_4[n]) / (exp(nlp_BI_logI0[n]) * C_BI_5[n] * (C_BI_4[n] ^ nlp_BI_b[n] + (C_BI_6[n] / 2) * nlp_BI_b[n] * (nlp_BI_b[n] - 1) * C_BI_4[n] ^ (nlp_BI_b[n] - 2))));
    }
    for (n in 1:N_BI) {
      // compute non-linear predictor values
      sigma_BI[n] = exp(nlp_BI_int[n] + C_sigma_BI_1[n] ^ nlp_BI_a[n]);
    }
    // combine univariate parameters
    for (n in 1:N) {
      Mu[n] = transpose([mu_Nmr[n], mu_BI[n]]);
      sigma[n] = transpose([sqrt(square(sigma_Nmr[n]) + se2_Nmr[n]), sqrt(square(sigma_BI[n]) + se2_BI[n])]);
      LSigma[n] = diag_pre_multiply(sigma[n], Lrescor);
    }
    for (n in 1:N) {
      target += multi_normal_cholesky_lpdf(Y[n] | Mu[n], LSigma[n]);
    }
  }
  // priors including constants
  target += lprior;
  target += std_normal_lpdf(z_1[1]);
  target += std_normal_lpdf(z_2[1]);
}
generated quantities {
  // residual correlations
  corr_matrix[nresp] Rescor = multiply_lower_tri_self_transpose(Lrescor);
  vector<lower=-1,upper=1>[nrescor] rescor;
  // extract upper diagonal of correlation matrix
  for (k in 1:nresp) {
    for (j in 1:(k - 1)) {
      rescor[choose(k - 1, 2) + j] = Rescor[j, k];
    }
  }
}

How can I specify that the parameters I0, b, and k are shared in the stan model? I also want the k estimated for each site with the random effect to be the same between the two sub-models.
Thank you!

1 Like

You should be able to share explicitly-named variables in non-linear formulas. The example you shared is way too complex to quickly debug the problem - I think it would be better to gradually test more and more complex formulas and see where the problem arises. I would guess you need to use inv_logit and not invlogit for example.

The easiest way to edit the Stan code (say for k) would be to merge the nlp_BI_k and nlp_Nmr_k variables. This could be done e.g. by:

Removing:

vector[N_BI] nlp_BI_k = rep_vector(0.0, N_BI);
...
nlp_BI_k += X_BI_k * b_BI_k;
...
   for (n in 1:N_BI) {
      // add more terms to the linear predictor
      nlp_BI_k[n] += r_2_BI_k_1[J_2_BI[n]] * Z_2_BI_k_1[n];
    }

then replacing the remaining use of nlp_BI_k with nlp_Nmr_k.

For efficiency you’d also want to remove variables that go into computation of nlp_BI_k i.e. r_2_BI_k_1, b_BI_k, z_2 and sd_2 (and all of their references) - overall this is a bit annoying problem if you are not very friendly with STan.

Hope that helps at least a little

1 Like

I was able to edit it! Thank you!