Hi all,
I have been working with a system of ODEs with random effects, which ends up in divergence. (Rhats beyond 1).
The things I have tried and did not work:
-
Different priors (vague) for the parameters.
-
Different initial values.
-
Instead of working with integrate_ode_rk45 switch to integrate_ode_bdf
-
Adding the likelihood outside of the loops
-
Distinguishing the times = 0 (1st and 2nd code)
-
Defining the system numerically (3rd code)
All these codes end up with a message as: Location parameter is -inf, or nan, or inf. The indicated line 104 always points out the likelihood or ode_solver. However, I do not seem to find what is the underlying problem.
functions {
real[] sldecay(real t, real[] y, real[] theta, real[] x_r, int[] x_i) {
real dydt[3];
dydt[1] = -theta[1]*y[1];
dydt[2] = -theta[2]*y[2];
dydt[3] = theta[1]*y[1] + theta[2]*y[2] - theta[3]*y[3];
return dydt;
}
}
data {
int<lower=1> N;
int<lower=1> nsubjects;
int<lower=1> subj_ids[N];
real <lower=0> y[N];
int <lower=1> ntimes;
real ts[ntimes];
real t0;
}
parameters {
real <lower =0> logps;
real <lower =0> logpl;
real <lower =0> logmuab;
//real sigmalogmuab;
real <lower =0, upper = 19500> logAB0;
real <lower =0> sigmalogAB0;
real <lower =0> logps0;
real <lower =0> logpl0;
real <lower=0> sigma;
vector[nsubjects] rAB0;
}
model {
real inits[3];
real mu[N, 3];
real theta[3];
real new_mu[N,3];
//transformed parameters
real ps = logps*0.001;
real pl = logpl*0.001;
real muab = logmuab*0.001;
//real sigma_muab = exp(sigmalogmuab);
real AB0 = logAB0*0.001;
real sigma_AB0 = sigmalogAB0*0.001;
real ps0 = logps0*0.001;
real pl0 = logpl0*0.001;
real sigmap = sigma*0.001;
// defining the correlation
//vector[2] mean_vec;
//matrix[2,2] Sigma;
//mean_vec[1] = -pow(sigma_AB0, 2.0)/2;
//mean_vec[2] = -pow(sigma_muab, 2.0)/2;
//Sigma[1,1] = pow(sigma_AB0, 2.0);
//Sigma[2,2] = pow(sigma_muab, 2.0);
//Sigma[1,2] = 0;
//Sigma[2,1] = 0;
//prior distributions
logps ~ normal(0, 1);
logpl ~ normal(0, 1);
logmuab ~ normal(0, 1);
//sigmalogmuab ~ exponential(1.0);
logAB0 ~ normal(36, 0.5);
sigmalogAB0 ~ exponential(0.1);
logps0 ~ normal(0, 0.1);
logpl0 ~ normal(0, 0.1);
sigma ~ exponential(0.1);
//rho_rhobit ~ uniform(-10, 10);
//defining the parameters
theta[1] = ps;
theta[2] = pl;
theta[3] = muab;
// defining the distribution for the random effects
for (subj_id in 1:nsubjects) {
rAB0[subj_id] ~ normal(-sigmalogAB0/2, sigmalogAB0);
//r[subj_id] ~ multi_normal_cholesky(mean_vec, Sigma);
//rAB0[subj_id] = r[subj_id,1];
//rmuab[subj_id] = r[subj_id,2];
}
for (obs_id in 1:N){
inits[3] = AB0 + exp(rAB0[subj_ids[obs_id]]);
inits[1] = ps0;
inits[2] = pl0;
//defining ode_solver
mu[1:obs_id, ] = integrate_ode_bdf(sldecay, inits, t0, ts[1:obs_id], theta, rep_array(0.0,0), rep_array(0,0),
1e-6, 1e-5, 1e3);
new_mu[obs_id,1] = mu[obs_id,1];
new_mu[obs_id,2] = mu[obs_id,2];
new_mu[obs_id,3] = log(mu[obs_id,3]) - sigma/2;
//likelihood function for AB levels
y[obs_id] ~ lognormal(new_mu[obs_id,3], sigma);
}
}
The second code distinguishes between times = 0 and different than 0, but ends up with the same problem:
functions {
real[] sldecay(real t, real[] y, real[] theta, real[] x_r, int[] x_i) {
real dydt[3];
dydt[1] = -theta[1]*y[1];
dydt[2] = -theta[2]*y[2];
dydt[3] = theta[1]*y[1] + theta[2]*y[2] - theta[3]*y[3];
return dydt;
}
}
data {
int<lower=1> N;
int<lower=1> nsubjects;
int<lower=1> subj_ids[N];
real <lower=0> y[N];
int <lower=1> ntimes;
real ts[ntimes];
real t0;
}
parameters {
real <lower = 0> logps;
real <lower = 0> logpl;
real <lower = 0> logmuab;
//real <lower = 0> sigmalogmuab;
real <lower = 0> logAB0;
real <lower = 0> sigmalogAB0;
real <lower = 0> logps0;
real <lower = 0> logpl0;
real <lower=0> sigma;
vector[nsubjects] rAB0;
//real rho_rhobit;
}
model{
real mu[1, 3];
real inits[3];
real theta[3];
real new_mu[ntimes,3];
//vector[nsubjects] rmuab;
real eval_time[1];
// transformed parameters
real ps = logps*0.001;
real pl = logpl*0.001;
real muab = logmuab*0.001;
//real sigma_muab = exp(sigmalogmuab);
real sigma_AB0 = sigmalogAB0*0.001;
real ps0 = logps0*0.001;
real pl0 = logpl0*0.001;
real sigmap = sigma*0.001;
// defining the correlation
//vector[2] mean_vec;
//matrix[2,2] Sigma;
//real rho = (exp(rho_rhobit) - 1)/(exp(rho_rhobit) + 1);
//prior distributions
logps ~ normal(0, 1);
logpl ~ normal(0, 1);
logmuab ~ normal(0, 1);
//sigmalogmuab ~ exponential(1.0);
logAB0 ~ normal(36, 0.5);
sigmalogAB0 ~ exponential(0.1);
logps0 ~ normal(0, 0.1);
logpl0 ~ normal(0, 0.1);
sigma ~ exponential(0.1);
//rho_rhobit ~ uniform(-10, 10);
//cov matrix
//mean_vec[1] = -sigmalogAB0/2;
//mean_vec[2] = -sigmalogmuab/2;
//Sigma[1,1] = sigmalogAB0;
//Sigma[2,2] = sigmalogmuab;
//Sigma[1,2] = 0;
//Sigma[2,1] = 0;
//defining the parameters
theta[1] = ps;
theta[2] = pl;
theta[3] = muab;
for (j in 1:nsubjects) {
//r[j] ~ multi_normal_cholesky(mean_vec, Sigma);
//rAB0[j] = r[j,1];
rAB0[j] ~ normal(-sigmalogAB0/2, sigmalogAB0);
//rmuab[j] = r[j,2];
}
for (i in 1:N){
//defining the initial conditions
inits[3] = logAB0 + exp(rAB0[subj_ids[i]]);
inits[1] = inits[3] + ps0;
inits[2] = inits[3] + pl0;
// ode solver
eval_time[1] = ts[i];
if (eval_time[1] == 0){
new_mu[i,1] = inits[1];
new_mu[i,2] = inits[2];
new_mu[i,3] = log(inits[3]) - sigmap/2;}
else{
mu = integrate_ode_bdf(sldecay, inits, t0, eval_time, theta, rep_array(0.0,0), rep_array(0,0),
1e-6, 1e-5, 1e3);
new_mu[i,1] = mu[1,1];
new_mu[i,2] = mu[1,2];
new_mu[i,3] = log(mu[1,3]) - sigmap/2;}
y[i] ~ lognormal(new_mu[i, 3], sigmap);
}
}
Finally the third code:
functions {
real[] analytic_int(real[] inits, real t0, real ts, real[] theta) {
real y[3];
real dt = ts - t0;
real C;
y[1] = inits[1]*exp(-theta[1]*dt);
y[2] = inits[2]*exp(-theta[2]*dt);
C = inits[3] + inits[1]*theta[1]/(theta[1]-theta[3]) + inits[2]*theta[2]/(theta[2]-theta[3]);
y[3] = theta[1]*y[1]/(theta[3] - theta[1]) + theta[2]*y[2]/(theta[3] - theta[2]) + C*exp(-theta[3]*dt);
return y;
}
}
data {
int<lower=1> N;
int<lower=1> nsubjects;
int<lower=1> subj_ids[N];
real <lower=0> y[N];
int <lower=1> ntimes;
//int <lower=1> nobs[nsubjects];
real ts[ntimes];
real t0;
}
transformed data {
real x_r[0];
int x_i[0];
}
parameters {
real <lower = 0> logps;
real <lower = 0> logpl;
real <lower = 0> logmuab;
//real <lower = 0> sigmalogmuab;
real <lower = 0> logAB0;
real <lower = 0> sigmalogAB0;
real <lower = 0> logps0;
real <lower = 0> logpl0;
real <lower=0> sigma;
vector[nsubjects] rAB0;
//real rho_rhobit;
}
model{
real mu[ntimes, 3];
real inits[3];
real theta[3];
real new_mu[ntimes, 3];
real eval_time[ntimes];
// transformed parameters
//real ps = exp(logps);
//real pl = exp(logpl);
//real muab = exp(logmuab);
//real sigma_muab = exp(sigmalogmuab);
//real AB0 = exp(logAB0);
//real sigma_AB0 = exp(sigmalogAB0);
//real ps0 = exp(logps0);
//real pl0 = exp(logpl0);
//real sigmap = exp(sigma);
// defining the correlation
//vector[2] mean_vec;
//matrix[2,2] Sigma;
//real rho = (exp(rho_rhobit) - 1)/(exp(rho_rhobit) + 1);
//prior distributions
logps ~ normal(0, 1);
logpl ~ normal(0, 1);
logmuab ~ normal(0, 1);
//sigmalogmuab ~ exponential(0.1);
logAB0 ~ normal(36, 0.5);
sigmalogAB0 ~ exponential(0.1);
logps0 ~ normal(0, 1);
logpl0 ~ normal(0, 1);
sigma ~ exponential(0.1);
//rho_rhobit ~ uniform(-10, 10);
//cov matrix
//mean_vec[1] = -sigmalogAB0/2;
//mean_vec[2] = -sigmalogmuab/2;
//Sigma[1,1] = sigmalogAB0;
//Sigma[2,2] = sigmalogmuab;
//Sigma[1,2] = 0;
//Sigma[2,1] = 0;
theta[1] = logps;
theta[2] = logpl;
theta[3] = logmuab;
for (j in 1:nsubjects) {
//r[j] ~ multi_normal_cholesky(mean_vec, Sigma);
//rAB0[j] = r[j,1];
//rmuab[j] = r[j,2];
rAB0[j] ~ normal(-sigmalogAB0/2, sigmalogAB0);
}
for (i in 1:N){
//defining the initial conditions
inits[3] = logAB0 + exp(rAB0[subj_ids[i]]);
inits[1] = inits[3] + logps0;
inits[2] = inits[3] + logpl0;
//defining the parameter
// ode solver
eval_time[i] = ts[i];
if (eval_time[i] == 0){
new_mu[i,1] = inits[1];
new_mu[i,2] = inits[2];
new_mu[i,3] = log(inits[3]) - sigma/2;}
else{
mu[i] = analytic_int(inits, t0, eval_time[i], theta);
new_mu[i,1] = mu[i,1];
new_mu[i,2] = mu[i,2];
new_mu[i,3] = log(mu[i,3]) - sigma/2;}}
//likelihood
y ~ lognormal(new_mu[, 3], sigma);
}