Dear Stan community!
I’m fairly new to Baysian modelling in general and Stan language in particular. So it might be that I’m missing some very obvious issues. However, I’ve been trying quite a lot but got stuck at one point and looking for any kind of hints or ideas!
I ran a within-subject repeated measures experiment with 3 different drug conditions (Placebo+2different neuromodulators) on a rather simple bandit 2 arm reward learning paradigm.
I have a hierarchical reward learning model that is based on similar models from hBayesDM package.
I can run the model on each of the drug conditions separately using using the Single condition model below. This works perfectly well, I get reasonable parameter estimates for the model parameters and also a good parameter recovery for input data.
However, I’d like to set up a model that includes all 3 conditions but I cannot get my head around how to do this. I created a Multiple conditions model (see below) but that seems to be far from what I need. I get divergent transitions for all iterations, chains do not mix at all. I suspect this to be related to the way I set up the parameterization for the multiple conditions. Basically my approach is to have vectors of parameters and then do the very same that I do in the basic model, too. I could imagine that it makes more sense to introduce another level of hierarchy in some other way but I don’t really know how to implement that.
An comments, ideas, or suggestions are highly appreciated!
Thanks!
Best, Simon
Single condition model
data {
int<lower=1> N; // number of subjects
int<lower=1> T; // max number of trials
int<lower=1, upper=T> Tsubj[N]; // number of trials per subject
int<lower=-1, upper=2> choice[N, T]; // choice in given trial
real outcome[N, T]; // outcome in given trial
real R_scale; // scale parameter of cauchy distribution of R parameter
real P_scale; // scale parameter of cauchy distribution of P parameter
}
transformed data {
vector[2] initV; // initial values for EV
initV = rep_vector(0.0, 2);
}
parameters {
// Declare all parameters as vectors for vectorizing
// Hyper(group)-parameters
vector[5] mu_pr;
vector<lower=0>[5] sigma;
// Subject-level raw parameters (for Matt trick)
vector[N] Arew_pr; // reward learning rate
vector[N] Apun_pr; // punishment learning rate
vector[N] tau_pr; // inverse temperature
vector[N] R_pr; // reward sensitivity
vector[N] P_pr; // punishment sensitivity
}
transformed parameters {
// subject-level parameters
vector<lower=0, upper=1>[N] Arew;
vector<lower=0, upper=1>[N] Apun;
vector<lower=0, upper=5>[N] tau;
vector[N] R; // you may not need the bounding above. It could be causing divergences
vector[N] P;
Arew = Phi_approx(mu_pr[1] + sigma[1] * Arew_pr);
Apun = Phi_approx(mu_pr[2] + sigma[2] * Apun_pr);
tau = Phi_approx(mu_pr[3] + sigma[3] * tau_pr) * 5;
R = mu_pr[4] + sigma[4] * R_pr; // the multiplier is only necessary when using the Phi_approx function
P = mu_pr[5] + sigma[5] * P_pr; // which tansforms the parameter to be in 0-1
}
model {
// Hyperparameters
mu_pr ~ normal(0, 1);
sigma[1:3] ~ normal(0, 0.2);
// sigma[4:5] ~ cauchy(0, 1);
sigma[4] ~ cauchy(0, R_scale);
sigma[5] ~ cauchy(0, P_scale);
// individual parameters
Arew_pr ~ normal(0, 1.0);
Apun_pr ~ normal(0, 1.0);
tau_pr ~ normal(0, 1.0);
R_pr ~ normal(0, 1.0);
P_pr ~ normal(0, 1.0);
// subject loop and trial loop
for (i in 1:N) {
vector[2] ev; // expected value
real PE; // prediction error
real alpha; // effective learning rate (Arew or Apun, respectively)
real sensitivity;// effective sensitivity (R or P, respectively)
ev = initV;
for (t in 1:(Tsubj[i])) {
// compute action probabilities
choice[i, t] ~ categorical_logit(tau[i] * ev);
// prediction error
sensitivity = (outcome[i, t] == 1 ? R[i] : P[i]);
PE = (outcome[i, t] * sensitivity) - ev[choice[i, t]];
// value updating (learning)
alpha = (PE >= 0) ? Arew[i] : Apun[i];
ev[choice[i, t]] += alpha * PE;
}
}
}
generated quantities {
// For group level parameters
real<lower=0, upper=1> mu_Arew;
real<lower=0, upper=1> mu_Apun;
real<lower=0, upper=5> mu_tau; // I made the upper bound here 5, given it was 1 before
real mu_R;
real mu_P;
// For log likelihood calculation
real log_lik[N];
// For posterior predictive check
real y_pred[N, T];
real PE_pred[N, T];
real ev_pred[N, T, 2];
// Set all posterior predictions to 0 (avoids NULL values)
for (i in 1:N) {
for (t in 1:T) {
y_pred[i, t] = -1;
PE_pred[i, t] = -1;
ev_pred[i, t, 1] = -1;
ev_pred[i, t, 2] = -1;
}
}
mu_Arew = Phi_approx(mu_pr[1]);
mu_Apun = Phi_approx(mu_pr[2]);
mu_tau = Phi_approx(mu_pr[3]) * 5;
mu_R = mu_pr[4];
mu_P = mu_pr[5];
{ // local section, this saves time and space
for (i in 1:N) {
vector[2] ev; // expected value
real PE; // prediction error
int co;
real alpha;
real sensitivity;
// Initialize values
ev = initV;
ev_pred[i, 1, 1] = 0;
ev_pred[i, 1, 2] = 0;
log_lik[i] = 0;
for (t in 1:(Tsubj[i])) {
// get choice of current trial (either 1 or 2)
co = choice[i, t];
// compute log likelihood of current trial
log_lik[i] += categorical_logit_lpmf(choice[i, t] | tau[i] * ev);
// generate posterior prediction for current trial
y_pred[i, t] = categorical_rng(softmax(tau[i] * ev));
// prediction error
sensitivity = (outcome[i, t] == 1 ? R[i] : P[i]);
PE = (outcome[i, t] * sensitivity) - ev[choice[i, t]];
PE_pred[i, t] = PE;
// value updating (learning)
alpha = (PE >= 0) ? Arew[i] : Apun[i];
ev[co] += alpha * PE;
if (t > 1) {
ev_pred[i, t] = ev_pred[i, t-1];
}
ev_pred[i, t, co] += alpha * PE;
}
}
}
}
Multiple conditions model
data {
int<lower=1> N; // number of subjects
int<lower=1> Max_tr; // max number of trials
int<lower=1> N_cond; // number of conditions
int<lower=0, upper=Max_tr> Tsubj[N, N_cond]; // subjects x n_trial
int<lower=-1, upper=2> choice[N, N_cond, Max_tr]; // subjects x conditions x choices
real outcome[N, N_cond, Max_tr]; // subjects x conditions x outcomes
real R_scale; // scale parameter of cauchy distribution used to scale sigma (range) of R parameter
real P_scale; // scale parameter of cauchy distribution used to scale sigma (range) of P parameter
}
transformed data {
vector[2] initV; // initial values for EV
initV = rep_vector(0.0, 2);
}
parameters {
// Declare all parameters as vectors for vectorizing
// Hyper(group)-parameters
// Hyperparameter means
real<lower=0, upper=1> mu_Arew[N_cond];
real<lower=0, upper=1> mu_Apun[N_cond];
real mu_R[N_cond];
real mu_P[N_cond];
real<lower=0, upper=5> mu_tau[N_cond];
// Hyperparameter sigmas
real sigma_Arew[N_cond];
real sigma_Apun[N_cond];
real sigma_R[N_cond];
real sigma_P[N_cond];
real sigma_tau[N_cond];
// Subject-level raw parameters (for Matt trick)
vector [N] Arew_pr[N_cond]; // reward learning rate
vector [N] Apun_pr[N_cond]; // punishment learning rate
vector [N] tau_pr[N_cond]; // inverse temperature
vector [N] R_pr[N_cond]; // reward sensitivity
vector [N] P_pr[N_cond]; // punishment sensitivity
}
transformed parameters {
// subject-level parameters
vector<lower=0, upper=1>[N] Arew[N_cond];
vector<lower=0, upper=1>[N] Apun[N_cond];
vector<lower=0, upper=5>[N] tau[N_cond];
vector[N] R[N_cond]; // you may not need the bounding above. It could be causing divergences
vector[N] P[N_cond];
for (j in 1:N_cond) {
Arew[j] = mu_Arew[j] + sigma_Arew[j] * Arew_pr[j];
Apun[j] = mu_Apun[j] + sigma_Apun[j] * Apun_pr[j];
R[j] = mu_R[j] + sigma_R[j] * R_pr[j];
P[j] = mu_P[j] + sigma_P[j] * P_pr[j];
tau[j] = mu_tau[j] + sigma_tau[j] * tau_pr[j];
}
}
model {
for (j in 1:N_cond) {
// Hyperparameters means
mu_Arew[j] ~ normal(0, 1);
mu_Apun[j] ~ normal(0, 1);
mu_R[j] ~ normal(0, 1);
mu_P[j] ~ normal(0, 1);
mu_tau[j] ~ normal(0, 1);
// Hyperparameters sigmas
sigma_Arew[j] ~ normal(0, 0.2);
sigma_Apun[j] ~ normal(0, 0.2);
sigma_R[j] ~ cauchy(0, R_scale);
sigma_P[j] ~ cauchy(0, P_scale);
sigma_tau[j] ~ normal(0, 0.2);
}
for (j in 1:N_cond) {
// individual parameters
Arew_pr[j] ~ normal(0, 1.0);
Apun_pr[j] ~ normal(0, 1.0);
tau_pr[j] ~ normal(0, 1.0);
R_pr[j] ~ normal(0, 1.0);
P_pr[j] ~ normal(0, 1.0);
}
// subject loop and trial loop
for (i in 1:N) {
int n_trials;
// condition loop
for (j in 1:N_cond) {
vector[2] ev; // expected value
real PE; // prediction error
real alpha; // effective learning rate (Arew or Apun, respectively)
real sensitivity;// effective sensitivity (R or P, respectively)
ev = initV;
n_trials = Tsubj[i, j];
if (n_trials <= 0) continue;
// trial loop
for (t in 1:n_trials) {
// compute action probabilities
choice[i, j, t] ~ categorical_logit(tau[j, i] * ev);
// prediction error
sensitivity = (outcome[i, j, t] == 1 ? R[j, i] : P[j, i]);
PE = (outcome[i, j, t] * sensitivity) - ev[choice[i, j, t]];
// value updating (learning)
alpha = (PE >= 0) ? Arew[j, i] : Apun[j, i];
ev[choice[i, j, t]] += alpha * PE;
}
}
}
}
generated quantities {
// For group level parameters
//real<lower=0, upper=1> mu_Arew;
//real<lower=0, upper=1> mu_Apun;
//real<lower=0, upper=5> mu_tau; // I made the upper bound here 5, given it was 1 before
//real mu_R;
//real mu_P;
// For log likelihood calculation
real log_lik[N, N_cond];
// For posterior predictive check
real y_pred[N, N_cond, Max_tr];
real PE_pred[N, N_cond, Max_tr];
real ev_pred[N, N_cond, Max_tr, 2];
// Set all posterior predictions to 0 (avoids NULL values)
for (i in 1:N) {
for (j in 1:N_cond){
log_lik[i, j] = -1;
for (t in 1:Max_tr) {
y_pred[i, j, t] = -1;
PE_pred[i, j, t] = -1;
ev_pred[i, j, t, 1] = -1;
ev_pred[i, j, t, 2] = -1;
}
}
}
{ // local section, this saves time and space
// subject loop
for (i in 1:N) {
// condition loop
for (j in 1:N_cond) {
vector[2] ev; // expected value
real PE; // prediction error
int co;
real alpha;
real sensitivity;
int n_trials;
n_trials = Tsubj[i, j];
if (n_trials <= 0) continue;
// Initialize values
ev = initV;
ev_pred[i, j, 1, 1] = 0;
ev_pred[i, j, 1, 2] = 0;
log_lik[i, j] = 0;
// trial loop
for (t in 1:n_trials) {
// get choice of current trial (either 1 or 2)
co = choice[i, j, t];
// compute log likelihood of current trial
log_lik[i, j] += categorical_logit_lpmf(choice[i, j, t] | tau[j, i] * ev);
// generate posterior prediction for current trial
y_pred[i, j, t] = categorical_rng(softmax(tau[j, i] * ev));
// prediction error
sensitivity = (outcome[i, j, t] == 1 ? R[j, i] : P[j, i]);
PE = (outcome[i, j, t] * sensitivity) - ev[choice[i, j, t]];
PE_pred[i, j, t] = PE;
// value updating (learning)
alpha = (PE >= 0) ? Arew[j, i] : Apun[j, i];
ev[co] += alpha * PE;
if (t > 1) {
ev_pred[i, j, t] = ev_pred[i, j, t-1];
}
ev_pred[i, j, t, co] += alpha * PE;
}
}
}
}
}