MCMC chain did not coverge in fitting ITCH model

I am trying fitting intertemporal choice heuristic (ITCH) model from Ericson, K. M., et al. (2015). “Money earlier or later? Simple heuristics explain intertemporal choices better than delay discounting does.” Psychol Sci 26(6): 826-833. to my sample data, but the MCMC chain does not converge. I have added the iteration number into 11000, but some para’s r hat is still large than 1.1. Is something wrong with my code? Thank you very much.

data{
  int<lower=1> n_s; // number of subjects
  int<lower=1> nTrials; // number of trials
  int<lower=0, upper=2> choice[n_s, nTrials]; // 0 for ss choice, 1 for ll choice
  real<lower=0> ss_money[n_s, nTrials];
  real<lower=0> ss_time[n_s, nTrials];
  real<lower=1> ll_money[n_s, nTrials];
  real<lower=1> ll_time[n_s, nTrials];
 
} // end data 


parameters{
// group raw parameters
  // mean
  vector [5] mean_raw;  
  // SD
  vector <lower=0> [5] sd_raw;
// subject raw parameters
  vector[n_s] beta0_raw;
  vector[n_s] beta1_raw;
  vector[n_s] beta2_raw;
  vector[n_s] beta3_raw;
  vector[n_s] tau_raw;
}

transformed parameters{
  // real subject parameters
  vector [n_s] beta0;
  vector<lower=0>[n_s] beta1;
  vector<lower=0>[n_s] beta2;
  vector [n_s] beta3;
  vector <lower=0> [n_s] tau;
  // matt trick
  beta0=mean_raw[1]+sd_raw[1]*beta0_raw;
  beta1=exp(mean_raw[2]+sd_raw[2]*beta1_raw);
  beta2=exp(mean_raw[3]+sd_raw[3]*beta2_raw);
  beta3=-exp(mean_raw[4]+sd_raw[4]*beta3_raw);
  tau=exp(mean_raw[5]+sd_raw[5]*tau_raw);
}

model{
  
  real utility;
  
  mean_raw ~ normal(0,1);
  sd_raw ~ cauchy(0,3);
  
  beta0 ~ normal(0,1);
  beta1~normal(0,1);
  beta2~normal(0,1);
  beta3~normal(0,1);
  tau~normal(0,1);
  
  for (i in 1:n_s){
    for(t in 1:nTrials){
      utility = beta0[i]+beta1[i]*(ll_money[i,t]-ss_money[i,t])+beta2[i]*(2*(ll_money[i,t]-ss_money[i,t])/(ll_money[i,t]+ss_money[i,t]))+beta3[i]*(ll_time[i,t]-ss_time[i,t]);
      choice[i,t] ~ bernoulli_logit(tau[i]*utility);
    }
  }
  
}

generated quantities {
   real utility;
   // log likelihood and generated predictions   
   real log_lik[n_s];
   real y_pred[n_s,nTrials];
   
   // fitted parameters
   real beta0_fit; 
   real beta1_fit;
   real beta2_fit;
   real beta3_fit;
   real tau_fit;
   
   
   beta0_fit=mean_raw[1];
   beta1_fit=exp(mean_raw[2]);
   beta2_fit=exp(mean_raw[3]);
   beta3_fit=-exp(mean_raw[4]);
   tau_fit=exp(mean_raw[5]);
   
   
   for (i in 1:n_s){
     log_lik[i]=0;
     for(t in 1:nTrials){
        utility = beta0[i]+beta1[i]*(ll_money[i,t]-ss_money[i,t])+beta2[i]*(2*(ll_money[i,t]-ss_money[i,t])/(ll_money[i,t]+ss_money[i,t]))+beta3[i]*(ll_time[i,t]-ss_time[i,t]);
        log_lik[i] += bernoulli_logit_lpmf(choice[i, t] | tau[i]*utility);
        y_pred[i, t] =bernoulli_rng(inv_logit(tau[i] *utility)) ;
     } 
    }
 
}
1 Like

There are a few issues here.

Firstly, assuming this is the correct model:
image

You’re exponentiating the betas before you pass them to the likelihood function:

  beta1=exp(mean_raw[2]+sd_raw[2]*beta1_raw);
  beta2=exp(mean_raw[3]+sd_raw[3]*beta2_raw);
  beta3=-exp(mean_raw[4]+sd_raw[4]*beta3_raw);

Is that intentional? The model above doesn’t appear to call for it.

I might be looking at the wrong model though, since the construction of your linear term and the above are not the same.

Additionally, your construction of the non-centered parameterisation (the ‘matt trick’) is a little off, you need to have the normal(0,1) priors on the beta_raw & tau_raw parameters, not the betas & tau themselves (since those are constructed after the fact in the transformed parameters). Additionally, you can use the std_normal() prior instead. So your priors should like:

  beta0_raw ~ std_normal();
  beta1_raw ~ std_normal();
  beta2_raw ~ std_normal();
  beta3_raw ~ std_normal();
  tau_raw ~ std_normal();

If the model pictured above is what you were trying to fit, here is how I would specify it in Stan:

parameters{
// group raw parameters
  // mean
  vector[5] mean_raw;  
  // SD
  vector <lower=0> [5] sd_raw;
// subject raw parameters
  vector[n_s] beta_raw[5];
}

transformed parameters{
  // real subject parameters
  vector[n_s] beta[5];

  for(i in 1:5) {
    beta[i] = mean_raw[i] + beta_raw[i] * sd_raw[i];
  }
}

model{
  mean_raw ~ std_normal();
  sd_raw ~ cauchy(0,3);
  
  for(i in 1:5) {
    beta_raw[i] ~ std_normal();
  }
  
  for (i in 1:n_s){
    for(t in 1:nTrials){
      real utility;
      real x_star = (ll_money[i,t] + ss_money[i,t]) / 2;
      real t_star = (ll_time[i,t] + ss_time[i,t]) / 2;

      utility = beta[1][i] + 
                beta[2][i] * (ll_money[i,t] - ss_money[i,t]) +
                beta[3][i] * ((ll_money[i,t] - ss_money[i,t]) / x_star) +
                beta[4][i] * (ll_time[i,t] - ss_time[i,t]) +
                beta[5][i] * ((ll_time[i,t] - ss_time[i,t]) / t_star);
      choice[i,t] ~ bernoulli_logit(utility);
    }
  }
  
}

Actually, you could take this one step further and vectorise the inner loop over nTrials, so that you’re only looping over n_s:

data{
  int<lower=1> n_s; // number of subjects
  int<lower=1> nTrials; // number of trials
  int<lower=0, upper=2> choice[n_s, nTrials]; // 0 for ss choice, 1 for ll choice
  vector<lower=0>[nTrials] ss_money[n_s];
  vector<lower=0>[nTrials] ss_time[n_s];
  vector<lower=1>[nTrials] ll_money[n_s];
  vector<lower=1>[nTrials] ll_time[n_s];
 
} // end data 

parameters{
// group raw parameters
  // mean
  vector[5] mean_raw;  
  // SD
  vector <lower=0> [5] sd_raw;
// subject raw parameters
  vector[n_s] beta_raw[5];
}

transformed parameters{
  // real subject parameters
  vector[n_s] beta[5];

  for(i in 1:5) {
    beta[i] = mean_raw[i] + beta_raw[i] * sd_raw[i];
  }
}

model{
  mean_raw ~ std_normal();
  sd_raw ~ cauchy(0,3);
  
  for(i in 1:5) {
    beta_raw[i] ~ std_normal();
  }
  
  for (i in 1:n_s){
      vector[nTrials] utility;
      vector[nTrials] x_star = (ll_money[i] + ss_money[i]) / 2;
      vector[nTrials] t_star = (ll_time[i] + ss_time[i]) / 2;

      utility = beta[1][i] + 
                beta[2][i] * (ll_money[i] - ss_money[i]) +
                beta[3][i] * ((ll_money[i] - ss_money[i]) ./ x_star) +
                beta[4][i] * (ll_time[i] - ss_time[i]) +
                beta[5][i] * ((ll_time[i] - ss_time[i]) ./ t_star);
      choice[i] ~ bernoulli_logit(utility);
  }
  
}

Thank you so much !! I made terrible and stupid mistake. And thank you again for the suggestion of vectorising the inner loop, i really appreciate it. I used exp to transform the parameters only to make sure some of them are postive or negative. And is there any other way that can make sure the parameter is postive or negative?

To constrain the beta to be either positive or negative when using the non-centered parameterisation, you need to place the appropriate lower= or upper= on the respective _raw variable.

For your beta variables, if you want to constrain them to be positive, then you need to declare the respective beta_raw variable with the lower bound: <lower = -mean_raw/sd_raw> (more background on that derivation in this post). If you want them to be negative, then that constraint simply becomes <upper= -mean_raw/sd_raw>.

Based on your original code, I’ll assume that beta[1] (the intercept) can be either positive or negative, beta[2] and beta[3] should be positive, beta[4] should be negative, and beta[5] should be positive. But if that’s not the case, then you can just modify the code accordingly.

Here’s how that construction would look:

data{
  int<lower=1> n_s; // number of subjects
  int<lower=1> nTrials; // number of trials
  int<lower=0, upper=2> choice[n_s, nTrials]; // 0 for ss choice, 1 for ll choice
  vector<lower=0>[nTrials] ss_money[n_s];
  vector<lower=0>[nTrials] ss_time[n_s];
  vector<lower=1>[nTrials] ll_money[n_s];
  vector<lower=1>[nTrials] ll_time[n_s];
 
} // end data 

parameters{
// group raw parameters
  // mean
  vector[5] mean_raw;  
  // SD
  vector <lower=0> [5] sd_raw;
// subject raw parameters
  vector[n_s] beta_raw1;
  vector<lower = -mean_raw[2]/sd_raw[2]>[n_s] beta_raw2; // implies strictly positive beta[2]
  vector<lower = -mean_raw[3]/sd_raw[3]>[n_s] beta_raw3; // implies strictly positive beta[3]
  vector<upper = -mean_raw[4]/sd_raw[4]>[n_s] beta_raw4; // implies strictly negative beta[4]
  vector<lower = -mean_raw[5]/sd_raw[5]>[n_s] beta_raw5; // implies strictly positive beta[5]
}

transformed parameters{
  // real subject parameters
  vector[n_s] beta[5];

  beta[1] = mean_raw[1] + beta_raw1 * sd_raw[1];
  beta[2] = mean_raw[2] + beta_raw2 * sd_raw[2];
  beta[3] = mean_raw[3] + beta_raw3 * sd_raw[3];
  beta[4] = mean_raw[4] + beta_raw4 * sd_raw[4];
  beta[5] = mean_raw[5] + beta_raw5 * sd_raw[5];

}

model{
  mean_raw ~ std_normal();
  sd_raw ~ cauchy(0,3);

  beta_raw1 ~ std_normal();
  beta_raw2 ~ std_normal();
  beta_raw3 ~ std_normal();
  beta_raw4 ~ std_normal();
  beta_raw5 ~ std_normal();
  
  for (i in 1:n_s){
      vector[nTrials] utility;
      vector[nTrials] x_star = (ll_money[i] + ss_money[i]) / 2;
      vector[nTrials] t_star = (ll_time[i] + ss_time[i]) / 2;

      utility = beta[1][i] + 
                beta[2][i] * (ll_money[i] - ss_money[i]) +
                beta[3][i] * ((ll_money[i] - ss_money[i]) ./ x_star) +
                beta[4][i] * (ll_time[i] - ss_time[i]) +
                beta[5][i] * ((ll_time[i] - ss_time[i]) ./ t_star);
      choice[i] ~ bernoulli_logit(utility);
  }
  
}