Hello all, this is my first post and I would greatly appreciate any advice on what might be a simple problem. I apologize in advance for the long-windedness, but:

I am working on optimizing the code for a Stan model I’ve written for a hierarchical reinforcement learning (Q-learning) model – I fit the model using RStan. For a few weeks now, I have been trying to understand the correlation between the posterior marginal distributions of the population-level softmax choice temperature parameter (“tau” in the below Stan code – the upper bound in the code is set to 50, though in the pairs plot the upper bound was set to 20; I recurrently find some correlation of this general shape no matter the upper bound on tau…) and a temporal-discount parameter (“discount”). Also, in the version of this model that generated the pairs plot here, I had coded the second element of the “option_values” vector as the expected value of the second option (“option2”). Qlearn_pairs.pdf (969.1 KB)

From what I’ve found in the Stan User’s manual, a trick to get around the problematic/correlated posterior distributions is to fix one of the elements of the vector “option_values”, whose elements are mapped to a K=2 simplex via the softmax function, as equal to zero (as in the below code). **Could someone help me understand if setting this second element equal to zero is the proper form, or rather if the proper form was what I had initially, with “option_values” containing the learned expected values for both options on a trial?**

(Some additional information that may or may not be relevant): In the past, I have had no problem coding a version of this model that results in no divergences, medium-range values (<10k) for n_eff for the amount of iterations I specify (2000 warmup, 8000 iterations for 24k total posterior samples), sufficient chain mixing/Rhat values (all close to 1 or < 1.05) for all estimated parameters, and the model finishes in around a few hours (see RStudio code below for specifics).

Any and all comments/suggestions on the below code would be greatly appreciated, and please do let me know if additional information is required from my end.

```
data {
int<lower=1> N;
int Trials;
int Tsubj[N];
int<lower=1, upper=6> option1[N, Trials];
int<lower=1, upper=6> option2[N, Trials];
int<lower=-1, upper=2> choice[N, Trials];
int<lower=1, upper=6> decision[N, Trials];
real outcome[N, Trials];
}
transformed data {
row_vector[3] initV;
initV = rep_row_vector(0.0, 3);
}
parameters {
// Declare all parameters as vectors for vectorizing
// Hyper(group)-parameters
vector[3] mu_pr;
vector<lower=0>[3] sigma_pr;
// Subject-level raw parameters
vector[N] learnrate_pr;
vector[N] discount_pr;
vector[N] tau_pr;
}
transformed parameters {
// subject-level parameters
vector<lower=0, upper=1>[N] learnrate;
vector<lower=0, upper=1>[N] discount;
vector<lower=0, upper=50>[N] tau;
for (i in 1:N) {
learnrate[i] = Phi_approx(mu_pr[1] + sigma_pr[1] * learnrate_pr[i]);
discount[i] = Phi_approx(mu_pr[2] + sigma_pr[2] * discount_pr[i]);
tau[i] = Phi_approx(mu_pr[3] + sigma_pr[3] * tau_pr[i]) * 50;
}
}
model {
// Hyperppriors defining mean and standard deviation of normally-distributed {learnrate, tau, gamma} parameters
mu_pr ~ normal(0,1);
sigma_pr ~ normal(0,0.2);
// individual parameters
learnrate_pr ~ normal(0,1);
discount_pr ~ normal(0,1);
tau_pr ~ normal(0,1);
// subject loop and trial loop
for (i in 1:N) {
matrix[6,3] ev;
real PE;
vector[2] option_values = [ 0, 0 ]';
for (idx in 1:6) { ev[idx] = initV; }
for (t in 1:(Tsubj[i])) {
option_values = [ ev[option1[i, t], 1] * tau[i], 0 ]';
// compute action probabilities
target += categorical_lpmf(choice[i, t] | softmax(option_values));
for (e in 1:3) {
if (e < 3) {
PE = (discount[i]*ev[decision[i, t], (e+1)]) - ev[decision[i, t], e];
ev[decision[i, t], e] += learnrate[i]*PE;
} else {
PE = outcome[i, t] - ev[decision[i, t], e];
ev[decision[i, t], e] += learnrate[i]*PE;
}
}
}
}
}
generated quantities {
// For group level parameters
real<lower=0, upper=1> mu_learnrate;
real<lower=0, upper=1> mu_discount;
real<lower=0, upper=50> mu_tau;
real log_lik[N, Trials];
real y_pred[N, Trials];
for (i in 1:N) {
for (t in 1:Trials) {
log_lik[i, t] = -1;
y_pred[i, t] = -1;
}
}
mu_learnrate = Phi_approx(mu_pr[1]);
mu_discount = Phi_approx(mu_pr[2]);
mu_tau = Phi_approx(mu_pr[3]) * 50;
{ // local section, this saves time and space
for (i in 1:N) {
matrix[6,3] ev;
real PE;
vector[2] option_values = [ 0, 0 ]';
for (idx in 1:6) { ev[idx] = initV; }
for (t in 1:(Tsubj[i])) {
option_values = [ ev[option1[i, t], 1] * tau[i], 0 ]';
// compute log likelihood of current trial
log_lik[i, t] += categorical_lpmf(choice[i, t] | softmax(option_values));
// generate posterior prediction for current trial
y_pred[i, t] = categorical_rng(softmax(option_values));
for (e in 1:3) {
if (e < 3) {
PE = (discount[i]*ev[decision[i, t], (e+1)]) - ev[decision[i, t], e];
ev[decision[i, t], e] += learnrate[i]*PE;
} else {
PE = outcome[i, t] - ev[decision[i, t], e];
ev[decision[i, t], e] += learnrate[i]*PE;
}
}
}
}
}
}
```

RStudio code for initializing and running RStan:

##### initialize fitting with variational Bayes

Qmodel_arg <- stan_model("~/Desktop/R_projects/Stan/Qlearning.stan")

Qparameters <- list(“learnrate” = c(0,0.5,1),

“discount” = c(0,0.5,1),

“tau” = c(0,10,20))

Q_init <- NULL

for (i in 1:4) {

fitQ_vb <- vb(Qmodel_arg, data = Qlearn_dat, init = 0)

Qm_vb <- colMeans(as.data.frame(fitQ_vb))

Qlist <- list(

mu_pr = as.vector(Qm_vb[startsWith(names(Qm_vb), “mu_pr”)]),

sigma = as.vector(Qm_vb[startsWith(names(Qm_vb), “sigma”)]))

for (p in names(Qparameters)) {

Qlist[[paste0(p, “_pr”)]] <-

as.vector(Qm_vb[startsWith(names(Qm_vb), paste0(p, “_pr”))])

}

Q_init[[i]] <- Qlist}

rm(Qlist, fitQ_vb, Qm_vb, Qmodel_arg, Qparameters, p, i)

fitQ <- stan(file = “~/Desktop/R_projects/Stan/Qlearning.stan”, data = Qlearn_dat,

iter = 4500, chains = 4, warmup = 2000, init = Q_init, control=list(adapt_delta=0.95))