I am using the stan model
functions {
real wt_integrand(real t, real tc, real[] theta, real[] x_r, int[] x_i) {
//w[t] = o*(1-c) * [(1-I) + (I*(1-d)) ]
real it_a = theta[1];
real it_b = theta[2];
real ot_a = theta[3];
real ot_b = theta[4];
real p_c = theta[5];
real p_d = theta[6];
// real T = theta[7];
real T = x_r[1];
real likelihood;
real I;
real i;
real o;
I = weibull_lcdf(T-t | it_a, it_b);
i = weibull_lpdf(T-t | it_a, it_b);
o = weibull_lpdf(t | ot_a, ot_b);
// print("t:",t," T:",T," I:",I," O:",o,"PC:",p_c);
likelihood = o + log_diff_exp(0.0,p_c);
likelihood = likelihood + log_sum_exp(log_diff_exp(0.0,I), i+log_diff_exp(0.0,p_d));
// print("like3:",likelihood);
return(likelihood);
}
real yt_integrand(real t, real tc, real[] theta, real[] x_r, int[] x_i) {
real it_a = theta[1];
real it_b = theta[2];
real ot_a = theta[3];
real ot_b = theta[4];
real p_c = theta[5];
real p_d = theta[6];
// real t_star = theta[7];
real t_star = x_r[1];
real likelihood;
real diag;
likelihood = weibull_lpdf(t | ot_a, ot_b) + log_sum_exp(0,p_c);
diag = weibull_lpdf(t_star-t | it_a, it_b);
// print("diag:",diag," :: ",t_star-t, " :: ", it_a, ",", it_b);
likelihood = likelihood + diag + p_d;
// print("like3:",likelihood);
return(likelihood);
}
}
data {
int N_T_NR_T; // no response starting at target - not a glitch
int N_T_NR_F; // no response starting at nontarget - onset/no response + onset/identity/no response
int N_T_F_F; // glitch
int N_T_T_F; // glitch
int N_T_F_T; // onset + identity + glitch
int N_F_NR_F;
int N_F_NR_N;
int N_F_N_F;
int N_F_N_N;
int C; // number of columns
matrix[N_T_NR_T,C] T_no_T ;
matrix[N_T_NR_F,C] T_no_F ;
matrix[N_T_F_F,C] T_F_to_F;
matrix[N_T_T_F,C] T_T_to_F;
matrix[N_T_F_T,C] T_F_to_T;
matrix[N_F_NR_F,C] F_no_F;
matrix[N_F_NR_N,C] F_no_N;
matrix[N_F_N_F,C] F_N_to_F;
matrix[N_F_N_N,C] F_N_to_N;
}
parameters {
// glitch response
real<lower=0,upper=10> gt_a;
real<lower=0,upper=200> gt_b;
real<lower=0, upper=1> p_e;
// change detection
real<lower=0,upper=10> ot_a;
real<lower=0,upper=200> ot_b;
real<lower=0, upper=1> p_c;
// identity detection
real<lower=0,upper=10> it_a;
real<lower=0,upper=200> it_b;
real<lower=0, upper=1> p_d;
// hyperparameters
// real<lower=0,upper=200> it_a_hyper;
// real<lower=0,upper=200> gt_a_hyper;
// real<lower=0,upper=200> ot_a_hyper;
// real<lower=0, upper=10> pc_a;
// real<lower=0, upper=10> pd_a;
// real<lower=0, upper=10> pe_a;
}
model {
real y;
real T;
real G;
real O;
real y_pdf;
real int_val;
real NO;
real ON;
real wt_prob;
real yt_prob;
// it_a ~ gamma(it_a_hyper, 1);
// it_b ~ gamma(1, 1);
// gt_a ~ gamma(gt_a_hyper, 1);
// gt_b ~ gamma(1, 1);
// ot_a ~ gamma(ot_a_hyper, 1);
// ot_b ~ gamma(1,1);
// p_c ~ beta(pc_a, 1);
// p_d ~ beta(pd_a, 1);
// p_e ~ beta(pd_a, 1);
// targets
for(i in 1:N_T_NR_T) {
T = T_no_T[i,1];
G = weibull_lcdf(T | gt_a, gt_b) + log(p_e);
target += log_diff_exp(0,G);
}
for(i in 1:N_T_NR_F) {
T = T_no_T[i,1];
G = weibull_lcdf(T | gt_a, gt_b) + log(p_e);
NO = log_diff_exp(0, weibull_lcdf(T | ot_a, ot_b));
int_val = integrate_1d(wt_integrand, 0.0, T, {it_a,it_b,ot_a,ot_b,log(p_c),log(p_d)}, {T_no_T[i,1]}, {0}, (1e-2) );
// print("int_val_wt:", int_val);
wt_prob = log_sum_exp(NO, int_val);
target += log_diff_exp(0,G) + wt_prob;
}
for(i in 1:N_T_F_F) {
y = T_F_to_F[i,3];
target += weibull_lpdf(y | gt_a, gt_b) + log(p_e);
}
for(i in 1:N_T_T_F) {
y = T_T_to_F[i,3];
target += weibull_lpdf(y | gt_a, gt_b) + log(p_e);
}
for(i in 1:N_T_F_T) {
y = T_F_to_T[i,3];
T = T_F_to_T[i,1];
G = weibull_lcdf(T | gt_a, gt_b) + log(p_e);
ON = weibull_lpdf(y | ot_a, ot_b) + log(p_c);
int_val = integrate_1d(yt_integrand, 0.0, y-(1e-2), {it_a,it_b,ot_a,ot_b,log(p_c),log(p_d)}, {T_F_to_T[i,3]}, {0}, (1e-2) );
// print("int_val_yt:", int_val);
yt_prob = log_sum_exp(NO, int_val);
y_pdf = log_sum_exp(weibull_lpdf(y | ot_a, ot_b)+p_c, yt_prob);
target += log_diff_exp(0,G) + log_diff_exp(log_sum_exp( weibull_lpdf(y | gt_a, gt_b)+log(p_e), y_pdf), weibull_lpdf(y | gt_a, gt_b)+log(p_e)+y_pdf);
}
// foils
for(i in 1:N_F_NR_F) {
T = F_no_F[i,1];
G = weibull_lcdf(T | gt_a, gt_b) + log(p_e);
target += log_diff_exp(0,G);
}
for(i in 1:N_F_NR_N) {
T = F_no_N[i,1];
G = weibull_lcdf(T | gt_a, gt_b) + log(p_e);
O = weibull_lcdf(T | ot_a, ot_b);
// use the expected onset time log(ot_a/ot_b), because there is no response
target += log_diff_exp(0,G) + log_sum_exp(log_diff_exp(0,O), log(ot_a/ot_b)+log_diff_exp(0,log(p_c)) ) ;
}
for(i in 1:N_F_N_F) {
y = F_N_to_F[i,3];
target += weibull_lpdf(y | gt_a, gt_b) + log(p_e);
}
for(i in 1:N_F_N_N) {
y = F_N_to_N[i,3];
G = weibull_lcdf(T | gt_a, gt_b) + log(p_e);
target += log_sum_exp(weibull_lpdf(y | gt_a, gt_b)+log(p_e), log_diff_exp(0,G)) + weibull_lpdf(y | ot_a, ot_b) + log(p_c);
}
}
which uses initial parameters
init_fun <- function() {
list(
it_a = 1,
it_b = 35,
ot_a = 1,
ot_b = 35,
gt_a = 1,
gt_b = 35,
p_c = 0.8,
p_e = 0.1,
p_d = 0.7
) # list
}
However, the sampler is running into gradient issues.
SAMPLING FOR MODEL 'att_model' NOW (CHAIN 1).
Chain 1: Exception: gradient_of_f: The gradient of f is nan for parameter 0 (in 'model2e0a222debff_att_model' at line 137)
[1] "Error in sampler$call_sampler(args_list[[i]]) : "
[2] " Exception: gradient_of_f: The gradient of f is nan for parameter 0 (in 'model2e0a222debff_att_model' at line 137)"
I have spent considerable amount of time trying to figure the reason for a “nan” gradient and have fit a dead end.
I am hoping the community can give me guidelines on moving forward.