Hello everyone,
I am facing a problem with a reinforcement learning parameter theoretically constrained to be between 0 and 1 (alpha i.e., learning rate) in a hierarchical model in rstan.
I currently use inv_logit transformation to obtain transformed values, but I am not sure it’s the way to go as the random_effects slightly push the values towards 0.5 causing the mean of the transformed parameter to be different than the population parameter. It makes the “random” effect not truly random as it biases the transformed parameters towards 0.5.
Specifically, to conduct a precision analysis for a forthcoming project I am not sure what parameter value to treat as the hypothesized effect, the population_effect, or the mean of the transformed parameter.
I have attached a sample code. Thanks a lot.
Best,
Ido
data {
//General fixed parameters for the experiment/models
int<lower = 1> Nsubjects;
int<lower = 1> Nblocks;
int<lower = 1> Ntrials;
int<lower = 1> Ntrials_per_subject[Nsubjects];
int<lower = 4> Narms;
int<lower = 2> Nraffle;
//Behavioral data:
int<lower = 0> choice[Nsubjects,Ntrials];
int<lower = 0> reward[Nsubjects,Ntrials];
int<lower = 0> offer1[Nsubjects,Ntrials];
int<lower = 0> offer2[Nsubjects,Ntrials];
int<lower = 0> selected_offer[Nsubjects,Ntrials];
int<lower = 0> first_trial_in_block[Nsubjects,Ntrials];
}
transformed data{
int<lower = 1> Nparameters=2;
}
parameters {
//population level parameters
vector [Nparameters] population_locations;
vector<lower=0>[Nparameters] population_scales;
//individual level
vector[Nsubjects] alpha_random_effect;
vector[Nsubjects] beta_random_effect;
}
transformed parameters {
vector<lower=0, upper=1>[Nsubjects] alpha;
vector [Nsubjects] beta;
for (subject in 1:Nsubjects) {
//internal variables
real Qdiff;
real PE;
real Qval[Narms];
//set individual parameters
alpha[subject] = inv_logit(population_locations[1] + population_scales[1] * alpha_random_effect[subject]);
beta[subject] = (population_locations[2] + population_scales[2] * beta_random_effect [subject]);
//likelihood estimation
for (trial in 1:Ntrials_per_subject[subject]){
//reset Qvalues (first trial only)
if (first_trial_in_block[subject,trial] == 1) {
Qval = rep_array(0.5, Narms);
}
//calculate difference in values for each of the two actions
Qdiff = Qval[offer2[subject,trial]]- Qval[offer1[subject,trial]];
//update Qvalues
PE = reward[subject,trial] - Qval[choice[subject,trial]];
Qval[choice[subject,trial]] = Qval[choice[subject,trial]]+alpha[subject]*PE;
#appened to external variables
Qdiff_external[trial,subject] = Qdiff;
}
}
}
model {
// population level
population_locations ~ normal(0,2);
population_scales ~ cauchy(0,2);
// individual level
alpha_random_effect ~ std_normal();
beta_random_effect ~ std_normal();
for (subject in 1:Nsubjects){
for (trial in 1:Ntrials_per_subject[subject]){
target+= bernoulli_logit_lpmf(selected_offer[subject,trial] | beta[subject] * Qdiff_external[trial,subject]);
}
}
}