Kfold invalid stanfit bug

Hi all, I’m running a range of stan_gamm4 count models and then using kfold for model selection. While all models appear to fit fine (i.e. converge and have no divergent interations)… some return the following error when I attempt to use kfold.


Fitting K = 5 models distributed over 12 cores
[1] “Error in sampler$call_sampler(args_list[[i]]) : ”
[2] ” Exception: gamma_rng: Inverse scale parameter is inf, but must be finite! (in ‘model_count’ at line 203)”
error occurred during calling the sampler; sampling not done
Error in check_stanfit(stanfit) :
Invalid stanfit object produced please report bug

An example of the rstanarm call for a model where this occurs is shown below (i.e. a model that as random intercept and slope terms).

# Set parallel compute
options(mc.cores = parallel::detectCores())

# temp solution for dealing with mclapply issues in rstudio
parallel:::setDefaultClusterOptions(setup_strategy = "sequential")

mod3 = rstanarm::stan_gamm4(formula =
                              n_pest_intercepts ~ s(log_trade_value_m),
                            random = ~
                              (1 + log_trade_value_m|reporter_code) +
                              (1 + log_trade_value_m|commodity_code) +
                              (1 + log_trade_value_m|year),
                            data = interception_model_data,
                            family = neg_binomial_2)

I’m currently using the dev version of rstanarm – because of a previous bug in kfold handling random effects (https://github.com/stan-dev/rstanarm/issues/435).

> packageVersion("rstanarm")
[1] ‘2.19.3’
> packageVersion("loo")
[1] ‘2.2.0’
> packageVersion("rstan")
[1] ‘2.19.3’
> packageVersion("stanHeaders")
[1] ‘2.21.0.3’

I’m using macOS 10.15.4 and R 4.0.0.

If it helps I’ve also included the underlying stan code derived from the above model call below:

rstan::get_stanmodel(bmsb_mod3$stanfit)
S4 class stanmodel 'count' coded as follows:
#include /pre/Columbia_copyright.stan
#include /pre/license.stan

// GLM for a count outcome
functions {
#include /functions/common_functions.stan
#include /functions/count_likelihoods.stan
}
data {
  // declares N, K, X, xbar, dense_X, nnz_x, w_x, v_x, u_x
#include /data/NKX.stan
  int<lower=0> y[N];  // count outcome
  // declares prior_PD, has_intercept, link, prior_dist, prior_dist_for_intercept
#include /data/data_glm.stan
  // declares has_weights, weights, has_offset, offset_
#include /data/weights_offset.stan
  int<lower=6,upper=7> family; // 6 poisson, 7 neg-binom, (8 poisson with gamma noise at some point?)
  // declares prior_{mean, scale, df}, prior_{mean, scale, df}_for_intercept, prior_{mean, scale, df}_for_aux
#include /data/hyperparameters.stan
  // declares t, p[t], l[t], q, len_theta_L, shape, scale, {len_}concentration, {len_}regularization
#include /data/glmer_stuff.stan
  // declares num_not_zero, w, v, u
#include /data/glmer_stuff2.stan
}
transformed data{
  real poisson_max = pow(2.0, 30.0);
  int<lower=1> V[special_case ? t : 0, N] = make_V(N, special_case ? t : 0, v);
  
  int can_do_countlogglm = K != 0 &&  // remove K!=0 after rstan includes this Stan bugfix: https://github.com/stan-dev/math/issues/1398
                           link == 1 && prior_PD == 0 && 
                           dense_X == 1 && has_weights == 0 && t == 0; 
  matrix[can_do_countlogglm ? N : 0, can_do_countlogglm ? K + K_smooth : 0] XS;
  
  // defines hs, len_z_T, len_var_group, delta, pos
#include /tdata/tdata_glm.stan

  if (can_do_countlogglm) {
    XS = K_smooth > 0 ? append_col(X[1], S) : X[1];
  }
}
parameters {
  real<lower=(link == 1 ? negative_infinity() : 0.0)> gamma[has_intercept];
  // declares z_beta, global, local, z_b, z_T, rho, zeta, tau
#include /parameters/parameters_glm.stan
  real<lower=0> aux_unscaled[family > 6];
  vector<lower=0>[N] noise[family == 8]; // do not store this
}
transformed parameters {
  real aux = negative_infinity(); // be careful with this in the family = 6 case
  // defines beta, b, theta_L
#include /tparameters/tparameters_glm.stan

  if (family > 6 && (prior_dist_for_aux == 0 || prior_scale_for_aux <= 0))
    aux = aux_unscaled[1];
  else if (family > 6) {
    aux = prior_scale_for_aux * aux_unscaled[1];
    if (prior_dist_for_aux <= 2) // normal or student_t
      aux += prior_mean_for_aux;
  }
 
  if (t > 0) {
    if (special_case == 1) {
      int start = 1;
      theta_L = scale .* (family == 6 ? tau : tau * aux);
      if (t == 1) b = theta_L[1] * z_b;
      else for (i in 1:t) {
        int end = start + l[i] - 1;
        b[start:end] = theta_L[i] * z_b[start:end];
        start = end + 1;
      }
    }
    else {
      if (family == 6)
        theta_L = make_theta_L(len_theta_L, p, 1.0,
                               tau, scale, zeta, rho, z_T);
      else
        theta_L = make_theta_L(len_theta_L, p, aux,
                               tau, scale, zeta, rho, z_T);
      b = make_b(z_b, theta_L, p, l);
    }
  }
}
model {
  if (can_do_countlogglm) {
    vector[K + K_smooth] coeff = K_smooth > 0 ? append_row(beta, beta_smooth) : beta;
    if (family != 7) {
      if (has_offset) {
        target += poisson_log_glm_lpmf(y | XS, has_intercept ? offset_ + gamma[1] : offset_, coeff);
      } else {
        target += poisson_log_glm_lpmf(y | XS, has_intercept ? gamma[1] : 0.0, coeff);
      }
    } else {
      if (has_offset) {
        target += neg_binomial_2_log_glm_lpmf(y | XS, has_intercept ? offset_ + gamma[1] : offset_, coeff, aux);
      } else {
        target += neg_binomial_2_log_glm_lpmf(y | XS, has_intercept ? gamma[1] : 0.0, coeff, aux);
      }
    }
  } else if (prior_PD == 0) {
#include /model/make_eta.stan
    if (t > 0) {
#include /model/eta_add_Zb.stan
    }
    if (has_intercept == 1) {
      if (link == 1) eta += gamma[1];
      else eta += gamma[1] - min(eta);
    }
    else {
#include /model/eta_no_intercept.stan
    }
  
    if (family == 8) {
      if      (link == 1) eta += log(aux) + log(noise[1]);
      else if (link == 2) {
        eta *= aux;
        eta .*= noise[1];
      }
      else                eta += sqrt(aux) + sqrt(noise[1]);
    }
  
    // Log-likelihood
    if (has_weights == 0) {  // unweighted log-likelihoods
      if (family != 7) {
        if (link == 1) target += poisson_log_lpmf(y | eta);
        else target += poisson_lpmf(y | linkinv_count(eta, link));
      }
      else {
        if (link == 1) target += neg_binomial_2_log_lpmf(y | eta, aux);
        else target += neg_binomial_2_lpmf(y | linkinv_count(eta, link), aux);
      }
    }
    else if (family != 7)
      target += dot_product(weights, pw_pois(y, eta, link));
    else
      target += dot_product(weights, pw_nb(y, eta, aux, link));
  }
  
  // Log-prior for aux
  if (family > 6 && 
      prior_dist_for_aux > 0 && prior_scale_for_aux > 0) {
    real log_half = -0.693147180559945286;    
    if (prior_dist_for_aux == 1)
      target += normal_lpdf(aux_unscaled | 0, 1) - log_half;
    else if (prior_dist_for_aux == 2)
      target += student_t_lpdf(aux_unscaled | prior_df_for_aux, 0, 1) - log_half;
    else 
      target += exponential_lpdf(aux_unscaled | 1);
  }
  
#include /model/priors_glm.stan
  
  // Log-prior for noise
  if (family == 8) target += gamma_lpdf(noise[1] | aux, 1);
  
  if (t > 0) {
    real dummy = decov_lp(z_b, z_T, rho, zeta, tau, 
                          regularization, delta, shape, t, p);
  }
}
generated quantities {
  real mean_PPD = compute_mean_PPD ? 0 : negative_infinity();
  real alpha[has_intercept];
  
  if (has_intercept == 1) {
    if (dense_X) alpha[1] = gamma[1] - dot_product(xbar, beta);
    else alpha[1] = gamma[1];
  }
  
  if (compute_mean_PPD) {
    vector[N] nu;
#include /model/make_eta.stan
    if (t > 0) {
#include /model/eta_add_Zb.stan
    }
    if (has_intercept == 1) {
      if (link == 1) eta += gamma[1];
      else {
        real shift = min(eta);
        eta += gamma[1] - shift;
        alpha[1] -= shift;
      }
    }
    else {
#include /model/eta_no_intercept.stan
    }

    if (family == 8) {
      if      (link == 1) eta += log(aux) + log(noise[1]);
      else if (link == 2) {
        eta *= aux;
        eta .*= noise[1];
      }
      else                eta += sqrt(aux) + sqrt(noise[1]);
    }
    nu = linkinv_count(eta, link);
    if (family != 7) for (n in 1:N) {
        if (nu[n] < poisson_max) mean_PPD += poisson_rng(nu[n]);
        else mean_PPD += normal_rng(nu[n], sqrt(nu[n]));
    }
    else for (n in 1:N) {
        real gamma_temp;
        if (is_inf(aux)) gamma_temp = nu[n];
        else gamma_temp = gamma_rng(aux, aux / nu[n]);
        if (gamma_temp < poisson_max)
          mean_PPD += poisson_rng(gamma_temp);
        else mean_PPD += normal_rng(gamma_temp, sqrt(gamma_temp));
    }
    mean_PPD /= N;
  }
} 

Yeah, that happens in negative binomial RNGs sometimes. There is not a lot you can do about it right now except try with a different seed and hope it does not overflow.

2 Likes