Hello all, this is my first post and I would greatly appreciate any advice on what might be a simple problem. I apologize in advance for the long-windedness, but:
I am working on optimizing the code for a Stan model I’ve written for a hierarchical reinforcement learning (Q-learning) model – I fit the model using RStan. For a few weeks now, I have been trying to understand the correlation between the posterior marginal distributions of the population-level softmax choice temperature parameter (“tau” in the below Stan code – the upper bound in the code is set to 50, though in the pairs plot the upper bound was set to 20; I recurrently find some correlation of this general shape no matter the upper bound on tau…) and a temporal-discount parameter (“discount”). Also, in the version of this model that generated the pairs plot here, I had coded the second element of the “option_values” vector as the expected value of the second option (“option2”). Qlearn_pairs.pdf (969.1 KB)
From what I’ve found in the Stan User’s manual, a trick to get around the problematic/correlated posterior distributions is to fix one of the elements of the vector “option_values”, whose elements are mapped to a K=2 simplex via the softmax function, as equal to zero (as in the below code). Could someone help me understand if setting this second element equal to zero is the proper form, or rather if the proper form was what I had initially, with “option_values” containing the learned expected values for both options on a trial?
(Some additional information that may or may not be relevant): In the past, I have had no problem coding a version of this model that results in no divergences, medium-range values (<10k) for n_eff for the amount of iterations I specify (2000 warmup, 8000 iterations for 24k total posterior samples), sufficient chain mixing/Rhat values (all close to 1 or < 1.05) for all estimated parameters, and the model finishes in around a few hours (see RStudio code below for specifics).
Any and all comments/suggestions on the below code would be greatly appreciated, and please do let me know if additional information is required from my end.
data {
int<lower=1> N;
int Trials;
int Tsubj[N];
int<lower=1, upper=6> option1[N, Trials];
int<lower=1, upper=6> option2[N, Trials];
int<lower=-1, upper=2> choice[N, Trials];
int<lower=1, upper=6> decision[N, Trials];
real outcome[N, Trials];
}
transformed data {
row_vector[3] initV;
initV = rep_row_vector(0.0, 3);
}
parameters {
// Declare all parameters as vectors for vectorizing
// Hyper(group)-parameters
vector[3] mu_pr;
vector<lower=0>[3] sigma_pr;
// Subject-level raw parameters
vector[N] learnrate_pr;
vector[N] discount_pr;
vector[N] tau_pr;
}
transformed parameters {
// subject-level parameters
vector<lower=0, upper=1>[N] learnrate;
vector<lower=0, upper=1>[N] discount;
vector<lower=0, upper=50>[N] tau;
for (i in 1:N) {
learnrate[i] = Phi_approx(mu_pr[1] + sigma_pr[1] * learnrate_pr[i]);
discount[i] = Phi_approx(mu_pr[2] + sigma_pr[2] * discount_pr[i]);
tau[i] = Phi_approx(mu_pr[3] + sigma_pr[3] * tau_pr[i]) * 50;
}
}
model {
// Hyperppriors defining mean and standard deviation of normally-distributed {learnrate, tau, gamma} parameters
mu_pr ~ normal(0,1);
sigma_pr ~ normal(0,0.2);
// individual parameters
learnrate_pr ~ normal(0,1);
discount_pr ~ normal(0,1);
tau_pr ~ normal(0,1);
// subject loop and trial loop
for (i in 1:N) {
matrix[6,3] ev;
real PE;
vector[2] option_values = [ 0, 0 ]';
for (idx in 1:6) { ev[idx] = initV; }
for (t in 1:(Tsubj[i])) {
option_values = [ ev[option1[i, t], 1] * tau[i], 0 ]';
// compute action probabilities
target += categorical_lpmf(choice[i, t] | softmax(option_values));
for (e in 1:3) {
if (e < 3) {
PE = (discount[i]*ev[decision[i, t], (e+1)]) - ev[decision[i, t], e];
ev[decision[i, t], e] += learnrate[i]*PE;
} else {
PE = outcome[i, t] - ev[decision[i, t], e];
ev[decision[i, t], e] += learnrate[i]*PE;
}
}
}
}
}
generated quantities {
// For group level parameters
real<lower=0, upper=1> mu_learnrate;
real<lower=0, upper=1> mu_discount;
real<lower=0, upper=50> mu_tau;
real log_lik[N, Trials];
real y_pred[N, Trials];
for (i in 1:N) {
for (t in 1:Trials) {
log_lik[i, t] = -1;
y_pred[i, t] = -1;
}
}
mu_learnrate = Phi_approx(mu_pr[1]);
mu_discount = Phi_approx(mu_pr[2]);
mu_tau = Phi_approx(mu_pr[3]) * 50;
{ // local section, this saves time and space
for (i in 1:N) {
matrix[6,3] ev;
real PE;
vector[2] option_values = [ 0, 0 ]';
for (idx in 1:6) { ev[idx] = initV; }
for (t in 1:(Tsubj[i])) {
option_values = [ ev[option1[i, t], 1] * tau[i], 0 ]';
// compute log likelihood of current trial
log_lik[i, t] += categorical_lpmf(choice[i, t] | softmax(option_values));
// generate posterior prediction for current trial
y_pred[i, t] = categorical_rng(softmax(option_values));
for (e in 1:3) {
if (e < 3) {
PE = (discount[i]*ev[decision[i, t], (e+1)]) - ev[decision[i, t], e];
ev[decision[i, t], e] += learnrate[i]*PE;
} else {
PE = outcome[i, t] - ev[decision[i, t], e];
ev[decision[i, t], e] += learnrate[i]*PE;
}
}
}
}
}
}
RStudio code for initializing and running RStan:
initialize fitting with variational Bayes
Qmodel_arg <- stan_model("~/Desktop/R_projects/Stan/Qlearning.stan")
Qparameters <- list(“learnrate” = c(0,0.5,1),
“discount” = c(0,0.5,1),
“tau” = c(0,10,20))
Q_init <- NULL
for (i in 1:4) {
fitQ_vb <- vb(Qmodel_arg, data = Qlearn_dat, init = 0)
Qm_vb <- colMeans(as.data.frame(fitQ_vb))
Qlist <- list(
mu_pr = as.vector(Qm_vb[startsWith(names(Qm_vb), “mu_pr”)]),
sigma = as.vector(Qm_vb[startsWith(names(Qm_vb), “sigma”)]))
for (p in names(Qparameters)) {
Qlist[[paste0(p, “_pr”)]] <-
as.vector(Qm_vb[startsWith(names(Qm_vb), paste0(p, “_pr”))])
}
Q_init[[i]] <- Qlist}
rm(Qlist, fitQ_vb, Qm_vb, Qmodel_arg, Qparameters, p, i)
fitQ <- stan(file = “~/Desktop/R_projects/Stan/Qlearning.stan”, data = Qlearn_dat,
iter = 4500, chains = 4, warmup = 2000, init = Q_init, control=list(adapt_delta=0.95))