# Integrate_1d function has nan gradients

I am using the stan model

 functions {
real wt_integrand(real t, real tc, real[] theta, real[] x_r, int[] x_i) {
//w[t] = o*(1-c) * [(1-I) + (I*(1-d)) ]
real it_a = theta[1];
real it_b = theta[2];
real ot_a = theta[3];
real ot_b = theta[4];
real p_c = theta[5];
real p_d = theta[6];
// real T = theta[7];
real T = x_r[1];
real likelihood;
real I;
real i;
real o;

I = weibull_lcdf(T-t | it_a, it_b);
i = weibull_lpdf(T-t | it_a, it_b);
o = weibull_lpdf(t | ot_a, ot_b);
// print("t:",t," T:",T," I:",I," O:",o,"PC:",p_c);
likelihood = o + log_diff_exp(0.0,p_c);
likelihood = likelihood + log_sum_exp(log_diff_exp(0.0,I), i+log_diff_exp(0.0,p_d));
// print("like3:",likelihood);
return(likelihood);
}

real yt_integrand(real t, real tc, real[] theta, real[] x_r, int[] x_i) {
real it_a = theta[1];
real it_b = theta[2];
real ot_a = theta[3];
real ot_b = theta[4];
real p_c = theta[5];
real p_d = theta[6];
// real t_star = theta[7];
real t_star = x_r[1];
real likelihood;
real diag;

likelihood =  weibull_lpdf(t | ot_a, ot_b) + log_sum_exp(0,p_c);
diag = weibull_lpdf(t_star-t | it_a, it_b);
// print("diag:",diag," :: ",t_star-t, " :: ", it_a, ",", it_b);
likelihood = likelihood + diag + p_d;
// print("like3:",likelihood);
return(likelihood);
}

}

data {
int N_T_NR_T; // no response starting at target - not a glitch
int N_T_NR_F; // no response starting at nontarget - onset/no response + onset/identity/no response
int N_T_F_F;  // glitch
int N_T_T_F;  // glitch
int N_T_F_T;  // onset + identity + glitch

int N_F_NR_F;
int N_F_NR_N;
int N_F_N_F;
int N_F_N_N;
int C; // number of columns

matrix[N_T_NR_T,C] T_no_T ;
matrix[N_T_NR_F,C] T_no_F ;
matrix[N_T_F_F,C] T_F_to_F;
matrix[N_T_T_F,C] T_T_to_F;
matrix[N_T_F_T,C] T_F_to_T;

matrix[N_F_NR_F,C] F_no_F;
matrix[N_F_NR_N,C] F_no_N;
matrix[N_F_N_F,C]  F_N_to_F;
matrix[N_F_N_N,C]  F_N_to_N;
}

parameters {
// glitch response
real<lower=0,upper=10> gt_a;
real<lower=0,upper=200> gt_b;
real<lower=0, upper=1> p_e;

// change detection
real<lower=0,upper=10> ot_a;
real<lower=0,upper=200> ot_b;
real<lower=0, upper=1> p_c;

// identity detection
real<lower=0,upper=10> it_a;
real<lower=0,upper=200> it_b;
real<lower=0, upper=1> p_d;

// hyperparameters
// real<lower=0,upper=200> it_a_hyper;
// real<lower=0,upper=200> gt_a_hyper;
// real<lower=0,upper=200> ot_a_hyper;
// real<lower=0, upper=10> pc_a;
// real<lower=0, upper=10> pd_a;
// real<lower=0, upper=10> pe_a;
}

model {
real y;
real T;
real G;
real O;
real y_pdf;
real int_val;
real NO;
real ON;
real wt_prob;
real yt_prob;

// it_a ~ gamma(it_a_hyper, 1);
// it_b ~ gamma(1, 1);
// gt_a ~ gamma(gt_a_hyper, 1);
// gt_b ~ gamma(1, 1);
// ot_a ~ gamma(ot_a_hyper, 1);
// ot_b ~ gamma(1,1);

// p_c ~ beta(pc_a, 1);
// p_d ~ beta(pd_a, 1);
// p_e ~ beta(pd_a, 1);

// targets
for(i in 1:N_T_NR_T) {
T = T_no_T[i,1];
G = weibull_lcdf(T | gt_a, gt_b) + log(p_e);
target += log_diff_exp(0,G);
}

for(i in 1:N_T_NR_F) {

T = T_no_T[i,1];
G = weibull_lcdf(T | gt_a, gt_b) + log(p_e);
NO = log_diff_exp(0, weibull_lcdf(T | ot_a, ot_b));
int_val = integrate_1d(wt_integrand, 0.0, T, {it_a,it_b,ot_a,ot_b,log(p_c),log(p_d)}, {T_no_T[i,1]}, {0}, (1e-2) );
// print("int_val_wt:", int_val);
wt_prob = log_sum_exp(NO, int_val);
target += log_diff_exp(0,G) + wt_prob;
}

for(i in 1:N_T_F_F) {
y = T_F_to_F[i,3];
target += weibull_lpdf(y | gt_a, gt_b) + log(p_e);
}

for(i in 1:N_T_T_F) {
y = T_T_to_F[i,3];
target += weibull_lpdf(y | gt_a, gt_b) + log(p_e);
}

for(i in 1:N_T_F_T) {
y = T_F_to_T[i,3];
T = T_F_to_T[i,1];
G = weibull_lcdf(T | gt_a, gt_b) + log(p_e);
ON = weibull_lpdf(y | ot_a, ot_b) + log(p_c);
int_val = integrate_1d(yt_integrand, 0.0, y-(1e-2), {it_a,it_b,ot_a,ot_b,log(p_c),log(p_d)}, {T_F_to_T[i,3]}, {0}, (1e-2) );
// print("int_val_yt:", int_val);
yt_prob = log_sum_exp(NO, int_val);
y_pdf = log_sum_exp(weibull_lpdf(y | ot_a, ot_b)+p_c,  yt_prob);
target += log_diff_exp(0,G) + log_diff_exp(log_sum_exp( weibull_lpdf(y | gt_a, gt_b)+log(p_e), y_pdf), weibull_lpdf(y | gt_a, gt_b)+log(p_e)+y_pdf);
}

// foils
for(i in 1:N_F_NR_F) {
T = F_no_F[i,1];
G = weibull_lcdf(T | gt_a, gt_b) + log(p_e);
target += log_diff_exp(0,G);
}

for(i in 1:N_F_NR_N) {
T = F_no_N[i,1];
G = weibull_lcdf(T | gt_a, gt_b) + log(p_e);
O = weibull_lcdf(T | ot_a, ot_b);
// use the expected onset time log(ot_a/ot_b), because there is no response
target += log_diff_exp(0,G) + log_sum_exp(log_diff_exp(0,O), log(ot_a/ot_b)+log_diff_exp(0,log(p_c)) ) ;
}

for(i in 1:N_F_N_F) {
y = F_N_to_F[i,3];
target += weibull_lpdf(y | gt_a, gt_b) + log(p_e);
}

for(i in 1:N_F_N_N) {
y = F_N_to_N[i,3];
G = weibull_lcdf(T | gt_a, gt_b) + log(p_e);
target += log_sum_exp(weibull_lpdf(y | gt_a, gt_b)+log(p_e), log_diff_exp(0,G)) + weibull_lpdf(y | ot_a, ot_b) + log(p_c);
}
}


which uses initial parameters

init_fun <- function() {
list(
it_a = 1,
it_b = 35,
ot_a = 1,
ot_b = 35,
gt_a = 1,
gt_b = 35,
p_c  = 0.8,
p_e  = 0.1,
p_d  = 0.7
) # list
}


However, the sampler is running into gradient issues.

SAMPLING FOR MODEL 'att_model' NOW (CHAIN 1).
Chain 1: Exception: gradient_of_f: The gradient of f is nan for parameter 0  (in 'model2e0a222debff_att_model' at line 137)

[1] "Error in sampler\$call_sampler(args_list[[i]]) : "
[2] "  Exception: gradient_of_f: The gradient of f is nan for parameter 0  (in 'model2e0a222debff_att_model' at line 137)"


I have spent considerable amount of time trying to figure the reason for a â€śnanâ€ť gradient and have fit a dead end.

I am hoping the community can give me guidelines on moving forward.

1 Like

I think that means that, if we call the first parameter \theta_1, that:

\frac{\partial d}{\partial \theta_1} \int_a^bf(\theta, x)dx is blowing up.

That could be cause the gradient mathematically doesnâ€™t exist, or it could be because something numeric is blowing up.

The first thing to do is verify the integral exists at the parameters youâ€™re specifying (maybe with Wolfram Alpha or something), then check the gradient exists, and then check to make sure the Stan code is implemented properly. Unfortunately thereâ€™s probably not an easy answer here :D. Just gotta take these things apart and investigate piece by piece.

The error happens here: https://github.com/stan-dev/math/blob/develop/stan/math/rev/arr/functor/integrate_1d.hpp#L46

[edit: fixed indexing of theta in equation]

Probably rounding errors piling up. Iâ€™d try using weibull_lccdf(...) instead of log_diff_exp(0, weibull_cdf(...)) and log1m_exp(...) instead of log_diff_exp(0, ...), maybe thatâ€™s more stable.

1 Like