In my model, It works well with simulated data that is within healthy ranges. However, i obtained some real data, which is action count data, for instance the number of times someone interacts with a question on the computer. Some of the questions in this data are severely overdispersed, as some people are writing multiple paragraphs with 500-800 actions, while some people only do a few like 1-5, causing my dispersion parameter nu to be extremely low.
I can use the negative binomial model, and it works fine, but i am trying to use a conway maxwell poisson model that could also potentially handle underdispersion at the same time. However, there is a normalizing constant here that struggles when nu is very low (overdispersion). Is there something i can do to transform nu such that the estimation runs smoothly, but i can also do model comparison still with simpler models, like one that is just based on the poisson distribution? For example, if i took the exp(nu) this would leave nu in a healthy range(above .3). Whenever i do a transformation, the estimation runs smoothly now but the log_lik indicates a problem when i go to do model comparison as LOO heavily prefers the “wrong” model, so i suspect i was doing something wrong.
functions{
real approximation(real log_lambda, real nu){
real log_mu = log_lambda / nu;
real nu_mu = nu * exp(log_mu);
real nu2 = nu^2;
// first 4 terms of the residual series
real log_sum_resid = log1p(
nu_mu^(-1) * (nu2 - 1) / 24 +
nu_mu^(-2) * (nu2 - 1) / 1152 * (nu2 + 23) +
nu_mu^(-3) * (nu2 - 1) / 414720 * (5 * nu2^2 - 298 * nu2 + 11237)
);
return nu_mu + log_sum_resid -
((log(2 * pi()) + log_mu) * (nu - 1) / 2 + log(nu) / 2);
}
real summation(real log_lambda, real nu){
real z = negative_infinity();
real z_last = 0;
//real b = 0;
for (j in 0:200) {
//b = b + 1;
z_last = z;
z = log_sum_exp(z, j * log_lambda - nu * lgamma(j+1));
if ((abs(z - z_last) < .001)) {
break;
}
}
return z;
}
real partial_sum(array[] int slice_n, int start, int end,
array[] int y_slice,
array[] int jj_slice, array[] int ii_slice, vector theta,
vector beta, vector nu, vector alpha, array[] int kk_slice, vector gamma) {
real partial_target = 0.0;
for (n in start : end) {
real log_lambda = nu[ii_slice[n]]*(alpha[ii_slice[n]]*theta[jj_slice[n]]+ beta[ii_slice[n]] + gamma[kk_slice[n]]);
real log_prob = 0;
if (log_lambda / nu[ii_slice[n]] > log(1.5) && log_lambda > log(1.5)) {
log_prob = y_slice[n] * log_lambda -
nu[ii_slice[n]] * lgamma(y_slice[n] + 1) -
approximation(log_lambda, nu[ii_slice[n]]);
} else {
log_prob = y_slice[n] * log_lambda -
nu[ii_slice[n]] * lgamma(y_slice[n] + 1) -
summation(log_lambda, nu[ii_slice[n]]);
}
partial_target += log_prob;
}
return partial_target;
}
}
data {
int<lower=0> I;
int<lower=0> J;
int<lower=1> N;
int<lower=1> K;
array[N] int<lower=1, upper=I> ii;
array[N] int<lower=1, upper=J> jj;
array[N] int<lower=1, upper=K> kk;
array[I] int<lower=1,upper=K> item_type_for_beta;
array[N] int<lower=0> y;
array[N] int seq_N;
int<lower=0> grainsize;
}
parameters {
vector[J] theta;
vector[I] beta;
vector[K] gamma;
vector<lower=0>[I] nu;
vector<lower=0>[K] sigma_beta_k;
vector<lower=0>[I] alpha;
real<lower=0> sigma_alpha;
real<lower=0> mu_alpha;
real<lower=0> mu_gamma;
real<lower=0> sigma_gamma;
real<lower=0> mu_nu;
real<lower=0> sigma_nu;
}
model {
mu_nu ~ normal(0,10);
sigma_nu ~ normal(0,5);
nu ~ normal(mu_nu,sigma_nu);
sigma_beta_k ~ normal(0,2);
for (i in 1:I) {
beta[i] ~ normal(gamma[item_type_for_beta[i]], sigma_beta_k[item_type_for_beta[i]]);
}
theta ~ normal(0, .3);
mu_alpha ~ normal(0,5);
sigma_alpha ~ normal(0,2);
alpha ~ normal(mu_alpha, sigma_alpha);
mu_gamma ~ normal(0,10);
sigma_gamma ~ normal(0,5);
gamma ~ normal(mu_gamma, sigma_gamma);
target += reduce_sum(partial_sum, seq_N, grainsize, y, jj, ii, theta,
beta, nu, alpha, kk, gamma);
}
generated quantities {
array[N] real log_lik;
for (n in 1:N) {
real log_lambda = nu[ii[n]] * (alpha[ii[n]] * theta[jj[n]] + beta[ii[n]] + gamma[kk[n]]);
if (log_lambda / nu[ii[n]] > log(1.5) && log_lambda > log(1.5)) {
log_lik[n] = y[n] * log_lambda - nu[ii[n]] * lgamma(y[n] + 1) - approximation(log_lambda, nu[ii[n]]);
} else {
log_lik[n] = y[n] * log_lambda - nu[ii[n]] * lgamma(y[n] + 1) - summation(log_lambda, nu[ii[n]]);
}
}
}