Hello everyone, I am a beginner in computational modeling and I am trying to fit a Bayesian hierarchical reinforcement learning model (Pearce-Hall model) to my behavioral data. However, I am encountering some fitting issues, including divergent transitions, high R-hat values, and extremely low effective sample size (ESS). I would appreciate any advice or guidance on how to resolve these problems.
1.Overview of My Research Design:
In my experiment, participants are required to make choices in a probabilistic reversal learning task. Each participant completed 180 trials. In each trial, participants are presented with two stimuli and choose one. Based on these data, I am using a Bayesian hierarchical Pearce-Hall reinforcement learning model for fitting.
- Solutions I’ve Tried:
I have increased the number of sampling iterations.
I have adjustedadapt_delta
and the step size.
I have increased the number of warm-up iterations, but the issues remain.
data {
int<lower=1> nSubjects;
int<lower=1> nTrials;
int<lower=1,upper=2> choice[nSubjects, nTrials];
real<lower=0, upper=10> reward[nSubjects, nTrials];
}
transformed data {
vector[2] initV; // initial values for V
initV = rep_vector(0.5, 2);
real initaPE;
initaPE = 0.5;
}
parameters {
// Hyper(group)-parameters
vector[4] mu_pr;
vector<lower=0>[4] sigma;
// Subject-level raw parameters (for Matt trick)
vector[nSubjects] alpha_pr; // learning rate
vector[nSubjects] gamma_pr; //
vector[nSubjects] c_pr; //
vector[nSubjects] tau_pr; // inverse temperature
}
transformed parameters {
// subject-level parameters
vector<lower=0, upper=1>[nSubjects] alpha;
vector<lower=0, upper=1>[nSubjects] gamma;
vector<lower=0, upper=1>[nSubjects] c;
vector<lower=0, upper=20>[nSubjects] tau;
for (i in 1:nSubjects) {
alpha[i] = Phi_approx(mu_pr[1] + sigma[1] * alpha_pr[i]);
gamma[i] = Phi_approx(mu_pr[2] + sigma[2] * gamma_pr[i]);
c[i] = Phi_approx(mu_pr[3] + sigma[3] * c_pr[i]);
tau[i] = Phi_approx(mu_pr[4] + sigma[4] * tau_pr[i]) * 20;
}
}
model {
// Hyperparameters
mu_pr[1] ~ normal(0, 1);
mu_pr[2] ~ normal(0, 1);
mu_pr[3] ~ normal(0, 1);
mu_pr[4] ~ normal(0, 5);
sigma[1] ~ cauchy(0,1);
sigma[2] ~ cauchy(0,1);
sigma[3] ~ cauchy(0,1);
sigma[4] ~ cauchy(0,5);
// individual parameters
alpha_pr ~ normal(0, 1.0);
gamma_pr ~ normal(0, 1.0);
c_pr ~ normal(0, 1.0);
tau_pr ~ normal(0, 1.0);
for (s in 1:nSubjects) {
vector[2] v; // value estimates
real pe; // prediction error
real k;
v = initV; // initialize value estimates
k = alpha[s];
pe = initaPE;
for (t in 1:nTrials) {
choice[s,t] ~ categorical_logit( tau[s] * v ); // choice made based on softmax
// Compute prediction error
pe = reward[s,t] - v[choice[s,t]];
// Update learning rate using Pearce-Hall rule
k = gamma[s] * fabs(pe)*c[s] + (1 - gamma[s]) * k;
// Update value estimates
v[choice[s,t]] = v[choice[s,t]] + k * pe;
}
}
}
generated quantities {
real log_lik[nSubjects, nTrials];
int y_pred[nSubjects, nTrials];
y_pred = rep_array(-999, nSubjects, nTrials); // Initialize prediction array
{ // local section, this saves time and space
for (s in 1:nSubjects) {
vector[2] v; // value estimates
real pe; // prediction error
real k; // dynamic learning rate
v = initV;
k = alpha[s];
for (t in 1:nTrials) {
log_lik[s,t] = categorical_logit_lpmf(choice[s,t] | tau[s] * v);
y_pred[s,t] = categorical_logit_rng( tau[s] * v );
// Compute prediction error
pe = reward[s,t] - v[choice[s,t]];
// Update learning rate using Pearce-Hall rule
k = gamma[s] * fabs(pe)*c[s] + (1 - gamma[s]) * k;
// Update value estimates
v[choice[s,t]] = v[choice[s,t]] + k * pe;
}
}
}
}
Are there any issues with my model? And where can I find tutorials or guidance on Stan modeling?