I am trying to fit the multilevel negative binomial hurdle model. I used gamma(0.01,0.01) as a dispersion prior, but I want to make the dispersion parameter depend on groups. So, I used different dispersion parameters for each group, but all have the same prior. Is it possible to make this modification in brms, I couldn’t find such a solution. So, using make_stancode( ) function, I created a stan model and made this modification by simply adding another for loop to the model’s likelihood function. The problem is it is very slow now. My estimation for complete fitting for 2000 iterations is 50 days. I know it is a large model with many parameters, and the data is quite big, but I feel like brms is faster than this. Is there a way to model this in brms, or is there a thing to make this model faster?
functions {
/* compute correlated group-level effects
* Args:
* z: matrix of unscaled group-level effects
* SD: vector of standard deviation parameters
* L: cholesky factor correlation matrix
* Returns:
* matrix of scaled group-level effects
*/
matrix scale_r_cor(matrix z, vector SD, matrix L) {
// r is stored in another dimension order than z
return transpose(diag_pre_multiply(SD, L) * z);
}
/* hurdle negative binomial log-PDF of a single response
* Args:
* y: the response value
* mu: mean parameter of negative binomial distribution
* phi: shape parameter of negative binomial distribution
* hu: hurdle probability
* Returns:
* a scalar to be added to the log posterior
*/
real hurdle_neg_binomial_lpmf(int y, real mu, real phi, real hu) {
if (y == 0) {
return bernoulli_lpmf(1 | hu);
} else {
return bernoulli_lpmf(0 | hu) +
neg_binomial_2_lpmf(y | mu, phi) -
log1m((phi / (mu + phi))^phi);
}
}
/* hurdle negative binomial log-PDF of a single response
* logit parameterization for the hurdle part
* Args:
* y: the response value
* mu: mean parameter of negative binomial distribution
* phi: phi parameter of negative binomial distribution
* hu: linear predictor of hurdle part
* Returns:
* a scalar to be added to the log posterior
*/
real hurdle_neg_binomial_logit_lpmf(int y, real mu, real phi, real hu) {
if (y == 0) {
return bernoulli_logit_lpmf(1 | hu);
} else {
return bernoulli_logit_lpmf(0 | hu) +
neg_binomial_2_lpmf(y | mu, phi) -
log1m((phi / (mu + phi))^phi);
}
}
/* hurdle negative binomial log-PDF of a single response
* log parameterization for the negative binomial part
* Args:
* y: the response value
* eta: linear predictor for negative binomial distribution
* phi phi parameter of negative binomial distribution
* hu: hurdle probability
* Returns:
* a scalar to be added to the log posterior
*/
real hurdle_neg_binomial_log_lpmf(int y, real eta, real phi, real hu) {
if (y == 0) {
return bernoulli_lpmf(1 | hu);
} else {
return bernoulli_lpmf(0 | hu) +
neg_binomial_2_log_lpmf(y | eta, phi) -
log1m((phi / (exp(eta) + phi))^phi);
}
}
/* hurdle negative binomial log-PDF of a single response
* log parameterization for the negative binomial part
* logit parameterization for the hurdle part
* Args:
* y: the response value
* eta: linear predictor for negative binomial distribution
* phi: phi parameter of negative binomial distribution
* hu: linear predictor of hurdle part
* Returns:
* a scalar to be added to the log posterior
*/
real hurdle_neg_binomial_log_logit_lpmf(int y, real eta, real phi, real hu) {
if (y == 0) {
return bernoulli_logit_lpmf(1 | hu);
} else {
return bernoulli_logit_lpmf(0 | hu) +
neg_binomial_2_log_lpmf(y | eta, phi) -
log1m((phi / (exp(eta) + phi))^phi);
}
}
// hurdle negative binomial log-CCDF and log-CDF functions
real hurdle_neg_binomial_lccdf(int y, real mu, real phi, real hu) {
return bernoulli_lpmf(0 | hu) + neg_binomial_2_lccdf(y | mu, phi) -
log1m((phi / (mu + phi))^phi);
}
real hurdle_neg_binomial_lcdf(int y, real mu, real phi, real hu) {
return log1m_exp(hurdle_neg_binomial_lccdf(y | mu, phi, hu));
}
}
data {
int<lower=1> N; // total number of observations
int Y[N]; // response variable
int<lower=1> K; // number of population-level effects
matrix[N, K] X; // population-level design matrix
int<lower=1> K_hu; // number of population-level effects
matrix[N, K_hu] X_hu; // population-level design matrix
// data for group-level effects of ID 1
int<lower=1> N_1; // number of grouping levels
int<lower=1> M_1; // number of coefficients per level
int<lower=1> J_1[N]; // grouping indicator per observation
// group-level predictor values
vector[N] Z_1_1;
vector[N] Z_1_2;
vector[N] Z_1_3;
vector[N] Z_1_4;
vector[N] Z_1_5;
vector[N] Z_1_6;
vector[N] Z_1_7;
vector[N] Z_1_8;
vector[N] Z_1_9;
vector[N] Z_1_10;
vector[N] Z_1_11;
vector[N] Z_1_12;
vector[N] Z_1_13;
vector[N] Z_1_14;
vector[N] Z_1_15;
vector[N] Z_1_16;
vector[N] Z_1_17;
vector[N] Z_1_18;
vector[N] Z_1_19;
vector[N] Z_1_20;
vector[N] Z_1_21;
vector[N] Z_1_22;
vector[N] Z_1_23;
int<lower=1> NC_1; // number of group-level correlations
// data for group-level effects of ID 2
int<lower=1> N_2; // number of grouping levels
int<lower=1> M_2; // number of coefficients per level
int<lower=1> J_2[N]; // grouping indicator per observation
// group-level predictor values
vector[N] Z_2_hu_1;
vector[N] Z_2_hu_2;
vector[N] Z_2_hu_3;
vector[N] Z_2_hu_4;
vector[N] Z_2_hu_5;
vector[N] Z_2_hu_6;
vector[N] Z_2_hu_7;
vector[N] Z_2_hu_8;
vector[N] Z_2_hu_9;
vector[N] Z_2_hu_10;
vector[N] Z_2_hu_11;
vector[N] Z_2_hu_12;
vector[N] Z_2_hu_13;
vector[N] Z_2_hu_14;
vector[N] Z_2_hu_15;
vector[N] Z_2_hu_16;
vector[N] Z_2_hu_17;
vector[N] Z_2_hu_18;
vector[N] Z_2_hu_19;
vector[N] Z_2_hu_20;
vector[N] Z_2_hu_21;
vector[N] Z_2_hu_22;
vector[N] Z_2_hu_23;
int<lower=1> NC_2; // number of group-level correlations
int prior_only; // should the likelihood be ignored?
}
transformed data {
int Kc = K - 1;
matrix[N, Kc] Xc; // centered version of X without an intercept
vector[Kc] means_X; // column means of X before centering
int Kc_hu = K_hu - 1;
matrix[N, Kc_hu] Xc_hu; // centered version of X_hu without an intercept
vector[Kc_hu] means_X_hu; // column means of X_hu before centering
for (i in 2:K) {
means_X[i - 1] = mean(X[, i]);
Xc[, i - 1] = X[, i] - means_X[i - 1];
}
for (i in 2:K_hu) {
means_X_hu[i - 1] = mean(X_hu[, i]);
Xc_hu[, i - 1] = X_hu[, i] - means_X_hu[i - 1];
}
}
parameters {
vector[Kc] b; // population-level effects
real Intercept; // temporary intercept for centered predictors
vector<lower=0>[N_1] shape; // shape parameter
vector[Kc_hu] b_hu; // population-level effects
real Intercept_hu; // temporary intercept for centered predictors
vector<lower=0>[M_1] sd_1; // group-level standard deviations
matrix[M_1, N_1] z_1; // standardized group-level effects
cholesky_factor_corr[M_1] L_1; // cholesky factor of correlation matrix
vector<lower=0>[M_2] sd_2; // group-level standard deviations
matrix[M_2, N_2] z_2; // standardized group-level effects
cholesky_factor_corr[M_2] L_2; // cholesky factor of correlation matrix
}
transformed parameters {
matrix[N_1, M_1] r_1; // actual group-level effects
// using vectors speeds up indexing in loops
vector[N_1] r_1_1;
vector[N_1] r_1_2;
vector[N_1] r_1_3;
vector[N_1] r_1_4;
vector[N_1] r_1_5;
vector[N_1] r_1_6;
vector[N_1] r_1_7;
vector[N_1] r_1_8;
vector[N_1] r_1_9;
vector[N_1] r_1_10;
vector[N_1] r_1_11;
vector[N_1] r_1_12;
vector[N_1] r_1_13;
vector[N_1] r_1_14;
vector[N_1] r_1_15;
vector[N_1] r_1_16;
vector[N_1] r_1_17;
vector[N_1] r_1_18;
vector[N_1] r_1_19;
vector[N_1] r_1_20;
vector[N_1] r_1_21;
vector[N_1] r_1_22;
vector[N_1] r_1_23;
matrix[N_2, M_2] r_2; // actual group-level effects
// using vectors speeds up indexing in loops
vector[N_2] r_2_hu_1;
vector[N_2] r_2_hu_2;
vector[N_2] r_2_hu_3;
vector[N_2] r_2_hu_4;
vector[N_2] r_2_hu_5;
vector[N_2] r_2_hu_6;
vector[N_2] r_2_hu_7;
vector[N_2] r_2_hu_8;
vector[N_2] r_2_hu_9;
vector[N_2] r_2_hu_10;
vector[N_2] r_2_hu_11;
vector[N_2] r_2_hu_12;
vector[N_2] r_2_hu_13;
vector[N_2] r_2_hu_14;
vector[N_2] r_2_hu_15;
vector[N_2] r_2_hu_16;
vector[N_2] r_2_hu_17;
vector[N_2] r_2_hu_18;
vector[N_2] r_2_hu_19;
vector[N_2] r_2_hu_20;
vector[N_2] r_2_hu_21;
vector[N_2] r_2_hu_22;
vector[N_2] r_2_hu_23;
// compute actual group-level effects
r_1 = scale_r_cor(z_1, sd_1, L_1);
r_1_1 = r_1[, 1];
r_1_2 = r_1[, 2];
r_1_3 = r_1[, 3];
r_1_4 = r_1[, 4];
r_1_5 = r_1[, 5];
r_1_6 = r_1[, 6];
r_1_7 = r_1[, 7];
r_1_8 = r_1[, 8];
r_1_9 = r_1[, 9];
r_1_10 = r_1[, 10];
r_1_11 = r_1[, 11];
r_1_12 = r_1[, 12];
r_1_13 = r_1[, 13];
r_1_14 = r_1[, 14];
r_1_15 = r_1[, 15];
r_1_16 = r_1[, 16];
r_1_17 = r_1[, 17];
r_1_18 = r_1[, 18];
r_1_19 = r_1[, 19];
r_1_20 = r_1[, 20];
r_1_21 = r_1[, 21];
r_1_22 = r_1[, 22];
r_1_23 = r_1[, 23];
// compute actual group-level effects
r_2 = scale_r_cor(z_2, sd_2, L_2);
r_2_hu_1 = r_2[, 1];
r_2_hu_2 = r_2[, 2];
r_2_hu_3 = r_2[, 3];
r_2_hu_4 = r_2[, 4];
r_2_hu_5 = r_2[, 5];
r_2_hu_6 = r_2[, 6];
r_2_hu_7 = r_2[, 7];
r_2_hu_8 = r_2[, 8];
r_2_hu_9 = r_2[, 9];
r_2_hu_10 = r_2[, 10];
r_2_hu_11 = r_2[, 11];
r_2_hu_12 = r_2[, 12];
r_2_hu_13 = r_2[, 13];
r_2_hu_14 = r_2[, 14];
r_2_hu_15 = r_2[, 15];
r_2_hu_16 = r_2[, 16];
r_2_hu_17 = r_2[, 17];
r_2_hu_18 = r_2[, 18];
r_2_hu_19 = r_2[, 19];
r_2_hu_20 = r_2[, 20];
r_2_hu_21 = r_2[, 21];
r_2_hu_22 = r_2[, 22];
r_2_hu_23 = r_2[, 23];
// initialize linear predictor term
vector[N] mu = Intercept + Xc * b;
// initialize linear predictor term
vector[N] hu = Intercept_hu + Xc_hu * b_hu;
for (n in 1:N) {
// add more terms to the linear predictor
mu[n] += r_1_1[J_1[n]] * Z_1_1[n] + r_1_2[J_1[n]] * Z_1_2[n] + r_1_3[J_1[n]] * Z_1_3[n] + r_1_4[J_1[n]] * Z_1_4[n] + r_1_5[J_1[n]] * Z_1_5[n] + r_1_6[J_1[n]] * Z_1_6[n] + r_1_7[J_1[n]] * Z_1_7[n] + r_1_8[J_1[n]] * Z_1_8[n] + r_1_9[J_1[n]] * Z_1_9[n] + r_1_10[J_1[n]] * Z_1_10[n] + r_1_11[J_1[n]] * Z_1_11[n] + r_1_12[J_1[n]] * Z_1_12[n] + r_1_13[J_1[n]] * Z_1_13[n] + r_1_14[J_1[n]] * Z_1_14[n] + r_1_15[J_1[n]] * Z_1_15[n] + r_1_16[J_1[n]] * Z_1_16[n] + r_1_17[J_1[n]] * Z_1_17[n] + r_1_18[J_1[n]] * Z_1_18[n] + r_1_19[J_1[n]] * Z_1_19[n] + r_1_20[J_1[n]] * Z_1_20[n] + r_1_21[J_1[n]] * Z_1_21[n] + r_1_22[J_1[n]] * Z_1_22[n] + r_1_23[J_1[n]] * Z_1_23[n];
}
for (n in 1:N) {
// add more terms to the linear predictor
hu[n] += r_2_hu_1[J_2[n]] * Z_2_hu_1[n] + r_2_hu_2[J_2[n]] * Z_2_hu_2[n] + r_2_hu_3[J_2[n]] * Z_2_hu_3[n] + r_2_hu_4[J_2[n]] * Z_2_hu_4[n] + r_2_hu_5[J_2[n]] * Z_2_hu_5[n] + r_2_hu_6[J_2[n]] * Z_2_hu_6[n] + r_2_hu_7[J_2[n]] * Z_2_hu_7[n] + r_2_hu_8[J_2[n]] * Z_2_hu_8[n] + r_2_hu_9[J_2[n]] * Z_2_hu_9[n] + r_2_hu_10[J_2[n]] * Z_2_hu_10[n] + r_2_hu_11[J_2[n]] * Z_2_hu_11[n] + r_2_hu_12[J_2[n]] * Z_2_hu_12[n] + r_2_hu_13[J_2[n]] * Z_2_hu_13[n] + r_2_hu_14[J_2[n]] * Z_2_hu_14[n] + r_2_hu_15[J_2[n]] * Z_2_hu_15[n] + r_2_hu_16[J_2[n]] * Z_2_hu_16[n] + r_2_hu_17[J_2[n]] * Z_2_hu_17[n] + r_2_hu_18[J_2[n]] * Z_2_hu_18[n] + r_2_hu_19[J_2[n]] * Z_2_hu_19[n] + r_2_hu_20[J_2[n]] * Z_2_hu_20[n] + r_2_hu_21[J_2[n]] * Z_2_hu_21[n] + r_2_hu_22[J_2[n]] * Z_2_hu_22[n] + r_2_hu_23[J_2[n]] * Z_2_hu_23[n];
}
}
model {
// likelihood not including constants
if (!prior_only) {
for (n in 1:N) {
for(o in 1:N_1){
target += hurdle_neg_binomial_log_logit_lpmf(Y[n] | mu[n], shape[o], hu[n]);
}
}
}
// priors not including constants
target += student_t_lupdf(b | 3, 0, 2.5);
target += student_t_lupdf(Intercept | 3, 0, 10);
target += gamma_lupdf(shape | 0.01, 0.01);
target += student_t_lupdf(b_hu | 3, 0, 2.5);
target += student_t_lupdf(Intercept_hu | 3, 0, 10);
target += cauchy_lupdf(sd_1 | 0,2);
target += std_normal_lupdf(to_vector(z_1));
target += lkj_corr_cholesky_lupdf(L_1 | 1);
target += cauchy_lupdf(sd_2 | 0,2);
target += std_normal_lupdf(to_vector(z_2));
target += lkj_corr_cholesky_lupdf(L_2 | 1);
}
generated quantities {
// actual population-level intercept
real b_Intercept = Intercept - dot_product(means_X, b);
// actual population-level intercept
real b_hu_Intercept = Intercept_hu - dot_product(means_X_hu, b_hu);
// compute group-level correlations
corr_matrix[M_1] Cor_1 = multiply_lower_tri_self_transpose(L_1);
vector<lower=-1,upper=1>[NC_1] cor_1;
// compute group-level correlations
corr_matrix[M_2] Cor_2 = multiply_lower_tri_self_transpose(L_2);
vector<lower=-1,upper=1>[NC_2] cor_2;
// extract upper diagonal of correlation matrix
for (k in 1:M_1) {
for (j in 1:(k - 1)) {
cor_1[choose(k - 1, 2) + j] = Cor_1[j, k];
}
}
// extract upper diagonal of correlation matrix
for (k in 1:M_2) {
for (j in 1:(k - 1)) {
cor_2[choose(k - 1, 2) + j] = Cor_2[j, k];
}
}
}