I’m actually having troubles with a zero inflated Poisson model, but I thought that starting was a simpler question was a good idea. In this model I’m getting this type of error after i added code for out of sample prediction:
Chain 1: Exception: model4004405c442f_zeroinflated_namespace::write_array: y_pred[k0__] is -2147483648, but must be greater than or equal to 0 (in 'model4004405c442f_zeroinflated' at line 113)
This is my stan code
data{
int<lower=0> N;
int<lower=0> N_sch;
int<lower=0> N_cluster;
int<lower=1,upper=N_sch> idx_sch[N];
int<lower=1,upper=N_cluster> idx_cluster[N];
int<lower=0> K_logit;
matrix[N, K_logit] X_logit;
int<lower=0> K_poisson;
matrix[N, K_poisson] X_poisson;
int<lower=0> offset[N];
int<lower=0> y[N];
int<lower=0> pred_N;
int<lower=1,upper=N_sch> pred_idx_sch[pred_N];
int<lower=1,upper=N_cluster> pred_idx_cluster[pred_N];
matrix[pred_N, K_logit] pred_X_logit;
matrix[pred_N, K_poisson] pred_X_poisson;
int<lower=0> pred_offset[pred_N];
}
transformed data {
matrix[N, K_logit] Q_logit_ast ;
matrix[K_logit, K_logit] R_logit_ast ;
matrix[K_logit, K_logit] R_logit_ast_inverse ;
matrix[N, K_logit] X_logit_centered ;
matrix[N, K_poisson] Q_poisson_ast ;
matrix[K_poisson, K_poisson] R_poisson_ast ;
matrix[K_poisson, K_poisson] R_poisson_ast_inverse ;
matrix[N, K_poisson] X_poisson_centered ;
matrix[pred_N, K_poisson] pred_X_poisson_centered ;
matrix[pred_N, K_logit] pred_X_logit_centered ;
for (k in 1:K_logit) {
X_logit_centered[,k] = X_logit[,k] - mean(X_logit[,k]) ;
pred_X_logit_centered[,k] = pred_X_logit[,k] -
mean(X_logit[,k]) ; // NOTE THAT I'M CENTERING USING X_logit instead of pred
}
for (k in 1:K_poisson) {
X_poisson_centered[,k] = X_poisson[,k] -
mean(X_poisson[,k]) ;
pred_X_poisson_centered[,k] = pred_X_poisson[,k] -
mean(X_poisson[,k]) ; // NOTE THAT I'M CENTERING USING X_poisson instead of pred
}
// thin and scale the QR decomposition
Q_logit_ast = qr_Q(X_logit_centered)[, 1:K_logit] * sqrt(N - 1.0) ;
R_logit_ast = qr_R(X_logit_centered)[1:K_logit, ] / sqrt(N - 1.0) ;
R_logit_ast_inverse = inverse(R_logit_ast) ;
Q_poisson_ast = qr_Q(X_poisson_centered)[, 1:K_poisson] * sqrt(N - 1.0) ;
R_poisson_ast = qr_R(X_poisson_centered)[1:K_poisson, ] / sqrt(N - 1.0) ;
R_poisson_ast_inverse = inverse(R_poisson_ast) ;
}
parameters {
real alpha_logit_ast ; // intercept
vector[K_logit] theta_logit ; // coefficients on Q_ast
vector[N_sch] sch_logit_std; //
real<lower=0> sigma_sch_logit; //
vector[N_cluster] cluster_logit_std; //
real<lower=0> sigma_cluster_logit; //
real alpha_poisson_ast ; // intercept
vector[K_poisson] theta_poisson ; // coefficients on Q_ast
vector[N_sch] sch_poisson_std; //
real<lower=0> sigma_sch_poisson; //
vector[N_cluster] cluster_poisson_std; //
real<lower=0> sigma_cluster_poisson; //
}
transformed parameters {
vector[N_sch] sch_logit;
vector[N_cluster] cluster_logit;
vector[N_sch] sch_poisson;
vector[N_cluster] cluster_poisson;
vector[N] lp;
vector[N] lambda_log;
sch_logit = sigma_sch_logit * sch_logit_std ; // Matt trick
cluster_logit = sigma_cluster_logit * cluster_logit_std ; // Matt trick
sch_poisson = sigma_sch_poisson * sch_poisson_std ; // Matt trick
cluster_poisson = sigma_cluster_poisson * cluster_poisson_std ; // Matt trick
lp = alpha_logit_ast + Q_logit_ast * theta_logit + sch_logit[idx_sch] + cluster_logit[idx_cluster];
lambda_log = (alpha_poisson_ast + Q_poisson_ast * theta_poisson + sch_poisson[idx_sch] + cluster_poisson[idx_cluster])/50;
}
model{
theta_logit ~ normal(0,1) ;
alpha_logit_ast ~ normal(0,1) ;
sch_logit_std ~ normal(0,1) ;
sigma_sch_logit ~ normal(0,1) ;
cluster_logit_std ~ normal(0,1) ;
sigma_cluster_logit ~ normal(0,1) ;
theta_poisson ~ normal(0,50) ; // normal(0,1)
alpha_poisson_ast ~ normal(0,50) ; // normal(0,1)
sch_poisson_std ~ normal(0,1) ;
sigma_sch_poisson ~ normal(0,50) ;
cluster_poisson_std ~ normal(0,1) ;
sigma_cluster_poisson ~ normal(0,50) ;
for(n in 1:N){
if(y[n] == 0)
target += log_sum_exp(bernoulli_logit_lpmf(1 | lp[n]),
bernoulli_logit_lpmf(0 | lp[n])
+ poisson_log_lpmf(y[n] | lambda_log[n] + log(offset[n])));
else
target += bernoulli_logit_lpmf(0 | lp[n])
+ poisson_log_lpmf(y[n] | lambda_log[n] + log(offset[n]));
}
}
generated quantities {
vector[N] log_lik;
int<lower=0> y_sim[N] ;
int zero;
int<lower=0> y_pred[N] ;
int pred_zero;
vector[pred_N] pred_lambda_log;
vector[pred_N] pred_lp;
//beta = R_ast_inverse * theta *sd_y ;
//pred_theta_poisson = R_poisson_ast_inverse * theta_poisson ;
pred_lambda_log = (alpha_poisson_ast +
pred_X_poisson_centered * (R_poisson_ast_inverse * theta_poisson) +
sch_poisson[pred_idx_sch] +
cluster_poisson[pred_idx_cluster])/50;
pred_lp = alpha_logit_ast +
pred_X_logit_centered * (R_logit_ast_inverse * theta_logit) +
sch_logit[pred_idx_sch] +
cluster_logit[pred_idx_cluster];
for(n in 1:N){
zero=bernoulli_logit_rng(lp[n]);
if(zero==1)
y_sim[n] = 0 ;
else
if(lambda_log[n] + log(offset[n])<20.7944) /// THIS HELPS ME DEBUG, NO CLUE IF THERE IS A BETTER WAY.
y_sim[n]=poisson_log_rng(lambda_log[n] + log(offset[n]));
else
y_sim[n]=999999;
if(y[n] == 0)
log_lik[n] = log_sum_exp(bernoulli_logit_lpmf(1 | lp[n]),
bernoulli_logit_lpmf(0 | lp[n])
+ poisson_log_lpmf(y[n] | lambda_log[n] + log(offset[n]))) ;
else
log_lik[n] = bernoulli_logit_lpmf(0 | lp[n])
+ poisson_log_lpmf(y[n] | lambda_log[n] + log(offset[n]));
}
for(n in 1:pred_N){
pred_zero=bernoulli_logit_rng(pred_lp[n]);
if(pred_zero==1)
y_pred[n] = 0 ;
else
if(pred_lambda_log[n] + log(pred_offset[n])<20.7944)
y_pred[n]=poisson_log_rng(pred_lambda_log[n] +
log(pred_offset[n]));
else
y_pred[n]=999999;
}
}
I don’t understand how the code that I have can produce negative numbers. In case it helps this is the histogram for pred_offset
What am I missing?