Hello,
I’ve been trying to fit an RL model similar to the one provided in this paper (Dynamic Interaction between Reinforcement Learning and Attention in Multidimensional Environments - ScienceDirect), the model can be found under “choice models” section. I’m getting an Error in sampler$call_sampler(c(args, dotlist)) : Initialization failed. error and I am not sure how to fix it.
In my model, there are 3 image columns with 3 features each (total of 9 features). Each feature has its own attention parameter, which I am trying to fit. Other than that, I want to fit a beta parameter for inverse temperature in softmax function, and a learning rate. For more information, please refer to the paper I provided above. Also, I’ve tried printing out things and it’s just not printing for some reason.
Thank you very much!
data {
int<lower = 0> num_cols; //number of image columns
int<lower = 0> num_features; // total number of features
int<lower = 0> num_trials; //number of trials
int<lower = 1> col_choice[num_trials]; //index of which arm was pulled
int<lower = 1> col_1_top_feature[num_trials]; // index of top feature of left column
int<lower = 1> col_1_mid_feature[num_trials]; // index of middle feature of left column
int<lower = 1> col_1_bottom_feature[num_trials]; // index of bottom feature of left column
int<lower = 1> col_2_top_feature[num_trials]; // index of top feature of middle column
int<lower = 1> col_2_mid_feature[num_trials]; // index of middle feature of middle column
int<lower = 1> col_2_bottom_feature[num_trials]; // index of bottom feature of middle column
int<lower = 1> col_3_top_feature[num_trials]; // index of top feature of right column
int<lower = 1> col_3_mid_feature[num_trials]; // index of middle feature of right column
int<lower = 1> col_3_bottom_feature[num_trials]; // index of bottom feature of right column
int<lower = 1> col_chosen_top_feature[num_trials]; // index of top feature of chosen column
int<lower = 1> col_chosen_mid_feature[num_trials]; // index of middle feature of chosen column
int<lower = 1> col_chosen_bottom_feature[num_trials]; // index of bottom feature of chosen column
int<lower = 0> result[num_trials]; //outcome of bandit arm pull
}
parameters {
real<lower = 0, upper = 1> alpha; //learning rate
real<lower=0> beta; //softmax parameter - inverse temperature
vector<lower = 0, upper = 1>[num_features] attention_params;
}
transformed parameters {
vector<lower=0, upper=1>[num_features] V[num_trials]; // value of each feature
vector<lower=0, upper=1>[num_cols] col_values[num_trials]; // value of each column
real delta[num_trials]; // prediction error
for (trial in 1:num_trials) {
//set initial V and delta for each trial
if (trial == 1) {
//if first trial, initialize feature and column values to 0
for (c in 1:num_cols) {
col_values[1, c] = 0;}
for (f in 1:num_features) {
V[1, f] = 0;
}
} else {
for (f in 1:num_features) {
V[trial, f] = V[trial - 1, f];;
}
//get column values by summing their respective features
col_values[trial, 1] = V[trial, col_1_top_feature[trial]] +
V[trial,col_1_mid_feature[trial]] + V[trial, col_1_bottom_feature[trial]];
col_values[trial, 2] = V[trial, col_2_top_feature[trial]] +
V[trial,col_2_mid_feature[trial]] + V[trial, col_2_bottom_feature[trial]];
col_values[trial, 3] = V[trial, col_3_top_feature[trial]] +
V[trial,col_3_mid_feature[trial]] + V[trial, col_3_bottom_feature[trial]];}
//calculate prediction error to update features V's
delta[trial] = result[trial] - col_values[trial,col_choice[trial]];
//update feature V values based on prediction error (delta), learning rate (alpha), and attention
V[trial, col_chosen_top_feature[trial]] = V[trial, col_chosen_top_feature[trial]] + alpha * delta[trial] * attention_params[col_chosen_top_feature[trial]];
V[trial, col_chosen_mid_feature[trial]] = V[trial, col_chosen_mid_feature[trial]] + alpha * delta[trial] * attention_params[col_chosen_mid_feature[trial]];
V[trial, col_chosen_bottom_feature[trial]] = V[trial, col_chosen_bottom_feature[trial]] + alpha * delta[trial] * attention_params[col_chosen_bottom_feature[trial]];
}
}
model {
// priors
beta ~ gamma(2, 3);
alpha ~ gamma(2, 3);
attention_params[1] ~ gamma(2,3);
attention_params[2] ~ gamma(2,3);
attention_params[3] ~ gamma(2,3);
attention_params[4] ~ gamma(2,3);
attention_params[5] ~ gamma(2,3);
attention_params[6] ~ gamma(2,3);
attention_params[7] ~ gamma(2,3);
attention_params[8] ~ gamma(2,3);
attention_params[9] ~ gamma(2,3);
for (trial in 1:num_trials) {
//returns the probability of having made the choice you made, given your beta and your V’s
target += log_softmax(col_values[trial] * beta)[col_choice[trial]];
}
}