I’m facing a problem getting the Bayesian information criteria (WAIC and LOO) for the bell regression model in Stan.
I present below the functions necessary to carry out the adjustment of the model already described.
Bell Distribution Functions:
functions
{
real lambertW(real x)
{
real y;
real w;
y= sqrt( 1 + exp(1) *x);
w=-1 + 2.036 * log( (1 + 1.14956131 * y)/(1 + 0.45495740*log(1+y)) );
w = (w/(1+w)) * ( 1 + log(x/w) );
w = (w/(1+w)) * ( 1 + log(x/w) );
w = (w/(1+w)) * ( 1 + log(x/w) );
return(w);
}
real bellnumber(int n)
{
if(n < 2){ return(1); }
else
{
int k;
vector[n] B;
vector[n] Bneu;
B[1] = 1;
for (i in 1:(n - 1))
{
k = i;
Bneu[1] = B[i];
for (j in 2:(i + 1))
{
Bneu[j] = B[j - 1] + Bneu[j - 1];
}
for(j in 1:n)
{
B[j] = Bneu[j];
}
}
return(Bneu[k + 1]);
}
}
real bell_lpmf(int x, real theta)
{
real Bx;
real lprob;
Bx = bellnumber(x);
lprob = x*log(theta) - exp(theta) + 1 + log(Bx) - lgamma(x+1);
return lprob;
}
real loglik_bell(int[] x, real[] theta)
{
real lprob[num_elements(x)];
for(i in 1:num_elements(x))
{
lprob[i] = x[i]*log(theta[i]) - exp(theta[i]);
}
return sum(lprob);
}
}
Stan program and accompanying data.
data
{
int<lower=1> n; // observacoes
int<lower=1> p; // nº de parametros
int y[n]; // variavel resposta
matrix[n, p] X; // Matriz do modelo
}
parameters
{
vector[p] beta;
}
transformed parameters
{
vector[n] eta = X*beta; // preditora linear
real mu[n];
real theta[n]; // Media
for (i in 1:n)
{
mu[i] = exp(eta[i]);
theta[i] = lambertW(mu[i]);
}
}
model
{
for(i in 1:p)
{
beta[i] ~ normal(0, 10); // priori
}
target += loglik_bell(y, theta); // Loverossimilhanca incompleta
}
generated quantities
{
vector[n] log_lik;
for(i in 1:n){
log_lik[i] = bell_lpmf(y[i] | theta[i]);
}
}
The data to be reproduced is in the bellreg package, described below:
bellreg::faults
head(faults)
I present below the codes in R for the adjustment of the model and the error in which I have been facing.
library(bellreg); library(rstan); library(loo)
head(faults)
Modbell = stats::model.matrix(~ lroll, data = faults)
standata = list(y = faults$lroll, X = Modbell ,
n = nrow(Modbell ), p = ncol(Modbell) )
modestbell = stan(file = "Bell.stan", data = standata, chains = 4,
iter = 20000, warmup = 10000, thin = 1, verbose = F)
print(modestbell, pars = c("beta"), probs = c(0.025,0.975),
digits_summary = 4)
loo(modestbell)
waic(extract_log_lik(modestbell))
> loo(modestbell)
Error in validate_ll(log_ratios) : All input values must be finite.
> waic(extract_log_lik(modestbell))
Error in validate_ll(x) : All input values must be finite.
How could this problem be solved?
I describe here the link of the article where I have been based to implement the functions: