Hello,
I am running the following model:
// generated with brms 2.17.0
functions {
/* hurdle lognormal log-PDF of a single response
* logit parameterization of the hurdle part
* Args:
* y: the response value
* mu: mean parameter of the lognormal distribution
* sigma: sd parameter of the lognormal distribution
* hu: linear predictor for the hurdle part
* Returns:
* a scalar to be added to the log posterior
*/
real hurdle_lognormal_logit_lpdf(real y_true, real y_obs, real mu, real y_err, real hu) {
if (y_true == 0) {
return bernoulli_logit_lpmf(1 | hu);
} else {
return bernoulli_logit_lpmf(0 | hu) +
lognormal_lpdf(y_obs | mu, y_err);
}
}
}
data {
int<lower=1> N; // total number of observations
vector[N] Y_obs; // response variable (MBH)
int<lower=1> K; // number of population-level effects
matrix[N, K] X; // population-level design matrix
int<lower=1> K_hu; // number of population-level effects
matrix[N, K_hu] X_hu; // population-level design matrix
int prior_only; // should the likelihood be ignored?
vector[N] Y_err;
}
transformed data {
int Kc = K - 1;
matrix[N, Kc] Xc; // centered version of X without an intercept
vector[Kc] means_X; // column means of X before centering
int Kc_hu = K_hu - 1;
matrix[N, Kc_hu] Xc_hu; // centered version of X_hu without an intercept
vector[Kc_hu] means_X_hu; // column means of X_hu before centering
vector[N] Y_true;
for (i in 2:K) {
means_X[i - 1] = mean(X[, i]);
Xc[, i - 1] = X[, i] - means_X[i - 1];
}
for (i in 2:K_hu) {
means_X_hu[i - 1] = mean(X_hu[, i]);
Xc_hu[, i - 1] = X_hu[, i] - means_X_hu[i - 1];
}
for (n in 1:N) {
Y_true[n] = normal_rng(Y_obs[n], Y_err[n]);
if (Y_true[n] < 0) {
Y_true[n] = 0;
}
}
}
parameters {
vector[Kc] b; // population-level effects
real Intercept; // temporary intercept for centered predictors
real<lower=0> sigma; // dispersion parameter
vector[Kc_hu] b_hu; // population-level effects
real Intercept_hu; // temporary intercept for centered predictors
}
transformed parameters {
real lprior = 0; // prior contributions to the log posterior
lprior += student_t_lpdf(Intercept | 3, 2, 2.5);
lprior += student_t_lpdf(sigma | 3, 0, 2.5)
- 1 * student_t_lccdf(0 | 3, 0, 2.5);
lprior += logistic_lpdf(Intercept_hu | 0, 1);
}
model {
// likelihood including constants
if (!prior_only) {
// initialize linear predictor term
vector[N] mu = Intercept + Xc * b;
// initialize linear predictor term
vector[N] hu = Intercept_hu + Xc_hu * b_hu;
for (n in 1:N) {
target += hurdle_lognormal_logit_lpdf(Y_true[n] | Y_obs[n], mu[n], Y_err[n], hu[n]); // modify sigma for Y_err[n]
}
}
// priors including constants
target += lprior;
}
generated quantities {
// actual population-level intercept
real b_Intercept = Intercept - dot_product(means_X, b);
// actual population-level intercept
real b_hu_Intercept = Intercept_hu - dot_product(means_X_hu, b_hu);
}
And for some reason, I keep obtaining this error:
Chain 1: Rejecting initial value:
Chain 1: Log probability evaluates to log(0), i.e. negative infinity.
Chain 1: Stan can’t start sampling from this initial value.
Chain 1:
Chain 1: Initialization between (-2, 2) failed after 100 attempts.
Chain 1: Try specifying initial values, reducing ranges of constrained values, or reparameterizing the model.
[1] “Error in sampler$call_sampler(args_list[[i]]) : Initialization failed.”
[1] “error occurred during calling the sampler; sampling not done”
It has only started happening since I introduced the Y_true vector in the transformed data block. I don’t understand what is causing this as my parameters to be sampled haven’t changed?