Integrate_1d function has nan gradients

I am using the stan model

 functions {
  real wt_integrand(real t, real tc, real[] theta, real[] x_r, int[] x_i) {
    //w[t] = o*(1-c) * [(1-I) + (I*(1-d)) ]
    real it_a = theta[1];
    real it_b = theta[2];
    real ot_a = theta[3];
    real ot_b = theta[4];
    real p_c = theta[5];
    real p_d = theta[6];
    // real T = theta[7];
    real T = x_r[1];
    real likelihood;
    real I;
    real i;
    real o;

    I = weibull_lcdf(T-t | it_a, it_b);
    i = weibull_lpdf(T-t | it_a, it_b);
    o = weibull_lpdf(t | ot_a, ot_b);
    // print("t:",t," T:",T," I:",I," O:",o,"PC:",p_c); 
    likelihood = o + log_diff_exp(0.0,p_c);
    likelihood = likelihood + log_sum_exp(log_diff_exp(0.0,I), i+log_diff_exp(0.0,p_d));
    // print("like3:",likelihood);
    return(likelihood);
  }
  
  real yt_integrand(real t, real tc, real[] theta, real[] x_r, int[] x_i) {
    real it_a = theta[1];
    real it_b = theta[2];
    real ot_a = theta[3];
    real ot_b = theta[4];
    real p_c = theta[5];
    real p_d = theta[6];
    // real t_star = theta[7];
    real t_star = x_r[1];
    real likelihood;
    real diag;

    likelihood =  weibull_lpdf(t | ot_a, ot_b) + log_sum_exp(0,p_c);
    diag = weibull_lpdf(t_star-t | it_a, it_b);
    // print("diag:",diag," :: ",t_star-t, " :: ", it_a, ",", it_b);
    likelihood = likelihood + diag + p_d;
    // print("like3:",likelihood);
    return(likelihood);
  }
  
}

data {
  int N_T_NR_T; // no response starting at target - not a glitch
  int N_T_NR_F; // no response starting at nontarget - onset/no response + onset/identity/no response
  int N_T_F_F;  // glitch
  int N_T_T_F;  // glitch
  int N_T_F_T;  // onset + identity + glitch
  
  int N_F_NR_F;
  int N_F_NR_N;
  int N_F_N_F;
  int N_F_N_N;
  int C; // number of columns
  
  matrix[N_T_NR_T,C] T_no_T ;
  matrix[N_T_NR_F,C] T_no_F ;
  matrix[N_T_F_F,C] T_F_to_F;
  matrix[N_T_T_F,C] T_T_to_F;
  matrix[N_T_F_T,C] T_F_to_T;
  
  matrix[N_F_NR_F,C] F_no_F;
  matrix[N_F_NR_N,C] F_no_N;
  matrix[N_F_N_F,C]  F_N_to_F;
  matrix[N_F_N_N,C]  F_N_to_N;
}

parameters {
  // glitch response
  real<lower=0,upper=10> gt_a;
  real<lower=0,upper=200> gt_b;
  real<lower=0, upper=1> p_e;
  
  // change detection
  real<lower=0,upper=10> ot_a;
  real<lower=0,upper=200> ot_b;
  real<lower=0, upper=1> p_c;
  
  // identity detection
  real<lower=0,upper=10> it_a;
  real<lower=0,upper=200> it_b;
  real<lower=0, upper=1> p_d;
  
  // hyperparameters
  // real<lower=0,upper=200> it_a_hyper;
  // real<lower=0,upper=200> gt_a_hyper;
  // real<lower=0,upper=200> ot_a_hyper;
  // real<lower=0, upper=10> pc_a;
  // real<lower=0, upper=10> pd_a;
  // real<lower=0, upper=10> pe_a;
}


model {
  real y;
  real T;
  real G;
  real O;
  real y_pdf;
  real int_val;
  real NO;
  real ON;
  real wt_prob;
  real yt_prob;
  
  // it_a ~ gamma(it_a_hyper, 1); 
  // it_b ~ gamma(1, 1);
  // gt_a ~ gamma(gt_a_hyper, 1);
  // gt_b ~ gamma(1, 1);
  // ot_a ~ gamma(ot_a_hyper, 1); 
  // ot_b ~ gamma(1,1);
  
  // p_c ~ beta(pc_a, 1); 
  // p_d ~ beta(pd_a, 1);
  // p_e ~ beta(pd_a, 1); 
  

  // targets
  for(i in 1:N_T_NR_T) {
    T = T_no_T[i,1];
    G = weibull_lcdf(T | gt_a, gt_b) + log(p_e);
    target += log_diff_exp(0,G);
  }
  
  for(i in 1:N_T_NR_F) {
    
    T = T_no_T[i,1];
    G = weibull_lcdf(T | gt_a, gt_b) + log(p_e);
    NO = log_diff_exp(0, weibull_lcdf(T | ot_a, ot_b));
    int_val = integrate_1d(wt_integrand, 0.0, T, {it_a,it_b,ot_a,ot_b,log(p_c),log(p_d)}, {T_no_T[i,1]}, {0}, (1e-2) );
    // print("int_val_wt:", int_val);
    wt_prob = log_sum_exp(NO, int_val);
    target += log_diff_exp(0,G) + wt_prob;
  }
  
  for(i in 1:N_T_F_F) {
    y = T_F_to_F[i,3];
    target += weibull_lpdf(y | gt_a, gt_b) + log(p_e);
  }
  
  for(i in 1:N_T_T_F) {
    y = T_T_to_F[i,3];
    target += weibull_lpdf(y | gt_a, gt_b) + log(p_e);
  }
  
  for(i in 1:N_T_F_T) {
    y = T_F_to_T[i,3];
    T = T_F_to_T[i,1];
    G = weibull_lcdf(T | gt_a, gt_b) + log(p_e);
    ON = weibull_lpdf(y | ot_a, ot_b) + log(p_c);
    int_val = integrate_1d(yt_integrand, 0.0, y-(1e-2), {it_a,it_b,ot_a,ot_b,log(p_c),log(p_d)}, {T_F_to_T[i,3]}, {0}, (1e-2) );
    // print("int_val_yt:", int_val);
    yt_prob = log_sum_exp(NO, int_val);
    y_pdf = log_sum_exp(weibull_lpdf(y | ot_a, ot_b)+p_c,  yt_prob);
    target += log_diff_exp(0,G) + log_diff_exp(log_sum_exp( weibull_lpdf(y | gt_a, gt_b)+log(p_e), y_pdf), weibull_lpdf(y | gt_a, gt_b)+log(p_e)+y_pdf); 
  }
  
  // foils
  for(i in 1:N_F_NR_F) {
    T = F_no_F[i,1];
    G = weibull_lcdf(T | gt_a, gt_b) + log(p_e);
    target += log_diff_exp(0,G);
  }
  
  for(i in 1:N_F_NR_N) {
    T = F_no_N[i,1];
    G = weibull_lcdf(T | gt_a, gt_b) + log(p_e);
    O = weibull_lcdf(T | ot_a, ot_b);
    // use the expected onset time log(ot_a/ot_b), because there is no response
    target += log_diff_exp(0,G) + log_sum_exp(log_diff_exp(0,O), log(ot_a/ot_b)+log_diff_exp(0,log(p_c)) ) ;
  }
  
  for(i in 1:N_F_N_F) {
    y = F_N_to_F[i,3];
    target += weibull_lpdf(y | gt_a, gt_b) + log(p_e);
  }
  
  for(i in 1:N_F_N_N) {
    y = F_N_to_N[i,3];
    G = weibull_lcdf(T | gt_a, gt_b) + log(p_e);
    target += log_sum_exp(weibull_lpdf(y | gt_a, gt_b)+log(p_e), log_diff_exp(0,G)) + weibull_lpdf(y | ot_a, ot_b) + log(p_c);
  } 
}

which uses initial parameters

init_fun <- function() {
    list(
      it_a = 1,
      it_b = 35,
      ot_a = 1, 
      ot_b = 35,
      gt_a = 1,
      gt_b = 35,
      p_c  = 0.8,
      p_e  = 0.1,
      p_d  = 0.7
    ) # list
  }

However, the sampler is running into gradient issues.

SAMPLING FOR MODEL 'att_model' NOW (CHAIN 1).
Chain 1: Exception: gradient_of_f: The gradient of f is nan for parameter 0  (in 'model2e0a222debff_att_model' at line 137)

[1] "Error in sampler$call_sampler(args_list[[i]]) : "                                                                    
[2] "  Exception: gradient_of_f: The gradient of f is nan for parameter 0  (in 'model2e0a222debff_att_model' at line 137)"

I have spent considerable amount of time trying to figure the reason for a “nan” gradient and have fit a dead end.

I am hoping the community can give me guidelines on moving forward.

1 Like

I think that means that, if we call the first parameter \theta_1, that:

\frac{\partial d}{\partial \theta_1} \int_a^bf(\theta, x)dx is blowing up.

That could be cause the gradient mathematically doesn’t exist, or it could be because something numeric is blowing up.

The first thing to do is verify the integral exists at the parameters you’re specifying (maybe with Wolfram Alpha or something), then check the gradient exists, and then check to make sure the Stan code is implemented properly. Unfortunately there’s probably not an easy answer here :D. Just gotta take these things apart and investigate piece by piece.

The error happens here: https://github.com/stan-dev/math/blob/develop/stan/math/rev/arr/functor/integrate_1d.hpp#L46

[edit: fixed indexing of theta in equation]

Probably rounding errors piling up. I’d try using weibull_lccdf(...) instead of log_diff_exp(0, weibull_cdf(...)) and log1m_exp(...) instead of log_diff_exp(0, ...), maybe that’s more stable.

1 Like