Dear Stan experts, I am a psychological student interested in cognitive model and I noticed people use RL model in Stan with different ways. Some people( rlssm package author), they built a vector that store the model’s prediciton data in transformed block, and there are another bunch of people, for example, the author of hbayesdm package, they put the model’s formula in model block. What’s the difference between the two way? Does one way is faster than the other way? Thanks.
Here is the two template I copied from rlssm package and hbayesdm package.
rlssm way
data {
int<lower=1> N; // number of data items
int<lower=1> K; // number of options
int<lower=1> trial_block[N]; // trial within block
vector[N] f_cor; // feedback correct option
vector[N] f_inc; // feedback incorrect option
int<lower=1, upper=K> cor_option[N]; // correct option
int<lower=1, upper=K> inc_option[N]; // incorrect option
int<lower=1> block_label[N]; // block label
int<lower=-1,upper=1> accuracy[N]; // accuracy (0, 1)
real initial_value; // intial value for learning in the first block
vector[2] alpha_priors; // mean and sd of the alpha prior
vector[2] sensitivity_priors; // mean and sd of the sensitivity prior
}
transformed data {
vector[K] Q0;
Q0 = rep_vector(initial_value, K);
}
parameters {
real alpha;
real sensitivity;
}
transformed parameters {
real log_p_t[N]; // trial-by-trial probability
vector[K] Q; // Q state values
real PE_cor;
real PE_inc;
real Q_mean;
real transf_alpha;
real transf_sensitivity;
transf_alpha = Phi(alpha);
transf_sensitivity = log(1 + exp(sensitivity));
for (n in 1:N) {
if (trial_block[n] == 1) {
if (block_label[n] == 1) {
Q = Q0;
} else {
Q_mean = mean(Q);
Q = rep_vector(Q_mean, K);
}
}
PE_cor = f_cor[n] - Q[cor_option[n]];
PE_inc = f_inc[n] - Q[inc_option[n]];
log_p_t[n] = transf_sensitivity*Q[cor_option[n]] - log(exp(transf_sensitivity*Q[cor_option[n]]) + exp(transf_sensitivity*Q[inc_option[n]]));
Q[cor_option[n]] = Q[cor_option[n]] + transf_alpha*PE_cor;
Q[inc_option[n]] = Q[inc_option[n]] + transf_alpha*PE_inc;
}
}
model {
alpha ~ normal(alpha_priors[1], alpha_priors[2]);
sensitivity ~ normal(sensitivity_priors[1], sensitivity_priors[2]);
accuracy ~ bernoulli(exp(log_p_t));
}
generated quantities {
vector[N] log_lik;
{for (n in 1:N) {
log_lik[n] = bernoulli_lpmf(accuracy[n] | exp(log_p_t[n]));
}
}
}
****
hbayesdm way
****
#include /pre/license.stan
data {
int<lower=1> N;
int<lower=1> T;
int<lower=1, upper=T> Tsubj[N];
int<lower=-1, upper=2> choice[N, T];
real outcome[N, T]; // no lower and upper bounds
}
transformed data {
vector[2] initV; // initial values for EV
initV = rep_vector(0.0, 2);
}
parameters {
// Declare all parameters as vectors for vectorizing
// Hyper(group)-parameters
vector[2] mu_pr;
vector<lower=0>[2] sigma;
// Subject-level raw parameters (for Matt trick)
vector[N] A_pr; // learning rate
vector[N] tau_pr; // inverse temperature
}
transformed parameters {
// subject-level parameters
vector<lower=0, upper=1>[N] A;
vector<lower=0, upper=5>[N] tau;
for (i in 1:N) {
A[i] = Phi_approx(mu_pr[1] + sigma[1] * A_pr[i]);
tau[i] = Phi_approx(mu_pr[2] + sigma[2] * tau_pr[i]) * 5;
}
}
model {
// Hyperparameters
mu_pr ~ normal(0, 1);
sigma ~ normal(0, 0.2);
// individual parameters
A_pr ~ normal(0, 1);
tau_pr ~ normal(0, 1);
// subject loop and trial loop
for (i in 1:N) {
vector[2] ev; // expected value
real PE; // prediction error
ev = initV;
for (t in 1:(Tsubj[i])) {
// compute action probabilities
choice[i, t] ~ categorical_logit(tau[i] * ev);
// prediction error
PE = outcome[i, t] - ev[choice[i, t]];
// value updating (learning)
ev[choice[i, t]] += A[i] * PE;
}
}
}
generated quantities {
// For group level parameters
real<lower=0, upper=1> mu_A;
real<lower=0, upper=5> mu_tau;
// For log likelihood calculation
real log_lik[N];
// For posterior predictive check
real y_pred[N, T];
// Set all posterior predictions to 0 (avoids NULL values)
for (i in 1:N) {
for (t in 1:T) {
y_pred[i, t] = -1;
}
}
mu_A = Phi_approx(mu_pr[1]);
mu_tau = Phi_approx(mu_pr[2]) * 5;
{ // local section, this saves time and space
for (i in 1:N) {
vector[2] ev; // expected value
real PE; // prediction error
// Initialize values
ev = initV;
log_lik[i] = 0;
for (t in 1:(Tsubj[i])) {
// compute log likelihood of current trial
log_lik[i] += categorical_logit_lpmf(choice[i, t] | tau[i] * ev);
// generate posterior prediction for current trial
y_pred[i, t] = categorical_rng(softmax(tau[i] * ev));
// prediction error
PE = outcome[i, t] - ev[choice[i, t]];
// value updating (learning)
ev[choice[i, t]] += A[i] * PE;
}
}
}
}
****