Hello everyone,
I have a hierarchical RL model based on the R-W delta rule. Out of all the models I have tested, LOOIC identifies this one as the best fit for my data: two learning rates (one for reward and one for punishment) and one sensitivity parameter.
All the diagnostics of this model look good, however, under one of the conditions, the learning rate for reward is really close to zero, while still having a high sensitivity, which seems strange to me.
I was wondering if there’s any obvious errors in my model, since this is my first time writing RL models.
data {
int<lower=1> N_subjects;
int<lower=1> N_trials;
int<lower=1> Maxtrial; //max nr trials across subjects
int<lower=1> N_conditions;
int<lower=1, upper = N_trials> Trialxsubj[N_conditions, N_subjects];//nr trials per subj
int<lower= 1, upper=2> choice[N_conditions, N_subjects, Maxtrial];
int<lower= -1, upper=1> outcome[N_conditions, N_subjects, Maxtrial];
}
transformed data {
vector[2] initV; // initial values
initV = rep_vector(0.0, 2);
}
parameters {
// Hyper-parameters
real mu_lr_r[N_conditions]; //mean for learning rate rew
real mu_lr_p[N_conditions]; //mean for learning rate pun
real mu_sen[N_conditions]; //mean for sensitivity param
real<lower=0> sigma_lr_r[N_conditions]; //sd for learning rate rew
real<lower=0> sigma_lr_p[N_conditions]; //sd for learning rate pun
real<lower=0> sigma_sen[N_conditions]; //sd for sensitivity param
// Subject-level raw parameters
real lr_rew_pr[N_conditions, N_subjects]; //raw learning rate rew
real lr_pun_pr[N_conditions, N_subjects]; //raw learning rate pun
real sensitivity_pr[N_conditions, N_subjects]; //raw sensitivity
}
transformed parameters {
// Transforming parameters to be [0,1]
real <lower=0, upper=1> lr_rew[N_conditions, N_subjects];
real <lower=0, upper=1> lr_pun[N_conditions, N_subjects];
real <lower=0, upper=1> sensitivity [N_conditions, N_subjects];
for (c in 1:N_conditions){
for (i in 1:N_subjects) {
lr_rew[c, i] = Phi_approx(mu_lr_r[c] + sigma_lr_r[c] * lr_rew_pr[c, i]);
lr_pun[c, i] = Phi_approx(mu_lr_p[c] + sigma_lr_p[c] * lr_pun_pr[c, i]);
sensitivity[c, i] = Phi_approx(mu_sen[c] + sigma_sen[c] * sensitivity_pr[c, i]);
}
}
}
model {
// Hyperparameter priors
mu_lr_r ~ normal(0, 1);
sigma_lr_r ~ normal(0, 0.2);
mu_lr_p ~ normal(0, 1);
sigma_lr_p ~ normal(0, 0.2);
mu_sen ~ normal(0, 1);
sigma_sen ~ normal(0, 0.2);
// Subject parameters priors
for (c in 1:N_conditions){
for (i in 1:N_subjects){
lr_rew_pr[c, i] ~ normal(0, 1);
lr_pun_pr[c, i] ~ normal(0, 1);
sensitivity_pr[c, i]~ normal(0, 1);
}
}
for (c in 1:N_conditions){
for (i in 1:N_subjects) {
vector[2] ev = initV; // expected value
real PE; // prediction error
for (t in 1:(Trialxsubj[c, i])) {
if (outcome[c, i, t] == -1){ //if the outcome is punishment
//action probabilities
choice[c, i, t] ~ categorical_logit(ev);
// prediction error
PE = sensitivity[c, i]*outcome[c, i, t] - ev[choice[c, i, t]];
// value updating
ev[choice[c, i, t]] += lr_pun[c, i] * PE;
}else{ //for non-rewarded and rewarded trials
//action probabilities
choice[c, i, t] ~ categorical_logit(ev);
// prediction error
PE = sensitivity[c, i]*outcome[c, i, t] - ev[choice[c, i, t]];
// value updating
ev[choice[c, i, t]] += lr_rew[c, i] * PE;
}
}
}
}
}
generated quantities {
// For log likelihood calculation
real log_lik[N_conditions, N_subjects];
{
for (c in 1:N_conditions){
for (i in 1:N_subjects) {
vector[2] ev = initV; // expected value
real PE; // prediction error
log_lik[c, i] = 0;
for (t in 1:(Trialxsubj[c, i])) {
if (outcome[c, i, t] == -1){//if the outcome is punishment
// compute log likelihood of current trial
log_lik[c, i] += categorical_logit_lpmf(choice[c, i, t] | ev);
// prediction error
PE = sensitivity[c, i]*outcome[c, i, t] - ev[choice[c, i, t]];
// value updating
ev[choice[c, i, t]] += lr_pun[c, i] * PE;
}else{ //if the trial is rewarded or non-rewarded
// compute log likelihood of current trial
log_lik[c, i] += categorical_logit_lpmf(choice[c, i, t] | ev);
// prediction error
PE = sensitivity[c, i]*outcome[c, i, t] - ev[choice[c, i, t]];
// value updating (learning)
ev[choice[c, i, t]] += lr_rew[c, i] * PE;
}
}
}
}
}
}