Greetings,

I am trying to use hBayesDM to model a Prisoner’s Dilemma task.

This is a simple task with 2 choices (Cooperate or Defect) and 4 outcomes (gaining 0, 1, 2 or 3 points).

I have modeled this task using the VBA Toolbox with complex models using multiple learning rates, tit-for-tat components, etc.

I am trying to run the most simple RW model of this task with just a single learning rate. For this, I based my model on the 2-armed bandit model, which is the most similar as it also only has 2 choices (however, the outcomes are -1 or 1).

I didn’t really change much from the original model, as I am completely new to Stan. I was hoping someone could give some pointers to arrive at my desired model.

Below is the code I have used. My data is just three columns (Subject, Choice, Outcome). Currently, this runs, and the model fits, but when I try to plot any outcomes I get this error:

```
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/mnt/d/Downloads/hBayesDM/Python/hbayesdm/diagnostics.py", line 62, in print_fit
dataset_dict = {
File "/mnt/d/Downloads/hBayesDM/Python/hbayesdm/diagnostics.py", line 64, in <dictcomp>
az.from_pystan(model_data.fit, log_likelihood='log_lik')
File "/home/vasco/anaconda3/envs/npai_python37/lib/python3.8/site-packages/arviz/data/io_pystan.py", line 944, in from_pystan
return PyStanConverter(
File "/home/vasco/anaconda3/envs/npai_python37/lib/python3.8/site-packages/arviz/data/io_pystan.py", line 268, in to_inference_data
"posterior": self.posterior_to_xarray(),
File "/home/vasco/anaconda3/envs/npai_python37/lib/python3.8/site-packages/arviz/data/base.py", line 46, in wrapped
return func(cls, *args, **kwargs)
File "/home/vasco/anaconda3/envs/npai_python37/lib/python3.8/site-packages/arviz/data/io_pystan.py", line 86, in posterior_to_xarray
data, data_warmup = get_draws(posterior, ignore=ignore, warmup=self.save_warmup)
File "/home/vasco/anaconda3/envs/npai_python37/lib/python3.8/site-packages/arviz/data/io_pystan.py", line 617, in get_draws
ary[ary_slice] = pyholder.chains[key][-ndraw:]
ValueError: could not broadcast input array from shape (1000) into shape (0)
```

Model:

```
#include /pre/license.stan
data {
int<lower=1> N; //Number of subjects
int<lower=1> T; // Max number of trials across subjects
int<lower=1, upper=T> Tsubj[N]; // Number of trials/block for each subject
int<lower=1, upper=2> choice[N, T]; // The options subjects choose
int<lower=0, upper=3> outcome[N, T]; // The amount of reward values
}
transformed data {
vector<lower=0, upper=3>[2] initV; // initial value for EV
initV = rep_vector(0, 2);
}
parameters {
// Declare all parameters as vectors for vectorizing
// Hyper(group)-parameters
vector[2] mu_pr;
vector<lower=0>[2] sigma;
// Subject-level raw parameters (for Matt trick)
vector[N] A_pr; // learning rate
vector[N] tau_pr; // inverse temperature
}
transformed parameters {
// subject-level parameters
vector<lower=0, upper=1>[N] A;
vector<lower=0, upper=5>[N] tau;
for (i in 1:N) {
A[i] = Phi_approx(mu_pr[1] + sigma[1] * A_pr[i]);
tau[i] = Phi_approx(mu_pr[2] + sigma[2] * tau_pr[i]) * 5;
}
}
model {
// Hyperparameters
mu_pr ~ normal(0, 1);
sigma ~ normal(0, 0.2);
// individual parameters
A_pr ~ normal(0, 1);
tau_pr ~ normal(0, 1);
// subject loop and trial loop
for (i in 1:N) {
vector[2] ev; // expected value
real PE; // prediction error
ev = initV;
for (t in 1:(Tsubj[i])) {
// compute action probabilities
choice[i, t] ~ categorical_logit(tau[i] * ev);
// prediction error
PE = outcome[i, t] - ev[choice[i, t]];
// value updating (learning)
ev[choice[i, t]] += A[i] * PE;
}
}
}
generated quantities {
// For group level parameters
real<lower=0, upper=1> mu_A;
real<lower=0, upper=5> mu_tau;
// For log likelihood calculation
real log_lik[N];
// For posterior predictive check
real y_pred[N, T];
// Set all posterior predictions to 0 (avoids NULL values)
for (i in 1:N) {
for (t in 1:T) {
y_pred[i, t] = -1;
}
}
mu_A = Phi_approx(mu_pr[1]);
mu_tau = Phi_approx(mu_pr[2]) * 5;
{ // local section, this saves time and space
for (i in 1:N) {
vector[2] ev; // expected value
real PE; // prediction error
// Initialize values
ev = initV;
log_lik[i] = 0;
for (t in 1:(Tsubj[i])) {
// compute log likelihood of current trial
log_lik[i] += categorical_logit_lpmf(choice[i, t] | tau[i] * ev);
// generate posterior prediction for current trial
y_pred[i, t] = categorical_rng(softmax(tau[i] * ev));
// prediction error
PE = outcome[i, t] - ev[choice[i, t]];
// value updating (learning)
ev[choice[i, t]] += A[i] * PE;
}
}
}
}
```

Any help is much appreciated! Thank you!