Hey folks,
I have a question regarding the implementation of certain link function outside of the brms framework, namely for cognitive models. I got a relatively simple hierachical cognitive model with 3 parameters:
functions{
// flatten_lower_tri: function that returns the lower-tri of a matrix, flattened to a vector
vector flatten_lower_tri(matrix mat) {
int n_cols = cols(mat) ;
int n_uniq = (n_cols * (n_cols - 1)) %/% 2;
vector[n_uniq] out ;
int i = 1;
for(c in 1:(n_cols-1)){
for(r in (c+1):n_cols){
out[i] = mat[r,c];
i += 1;
}
}
return(out) ;
}
}
data {
int <lower=0> N; // number rownumber
int <lower=0> K; // categories
int <lower=0> J; // Dims of Cov Matrix
int R[K]; // number of responses per category
int count[N,K]; // observed data
real scale_b; // set scaling for background noise
int retrievals;
}
parameters {
// Defining vector for hyper and subject parameters
cholesky_factor_corr[J] L_Omega;
vector<lower=0>[J] sigma;
vector[J] hyper_pars;
matrix[J,N] theta;
}
transformed parameters {
// non-centered multivariate
matrix[J,N] subj_pars = (
diag_pre_multiply( sigma, L_Omega )
* theta
+ rep_matrix(hyper_pars,N)
) ;
// Transform f Parameter
real mu_f = inv_logit(hyper_pars[3]);
row_vector[N] f = inv_logit(subj_pars[3,]);
// activations
real acts_IIP[N];
real acts_IOP[N];
real acts_DIP[N];
real acts_DIOP[N];
real acts_NPL[N];
// probabilities
vector[K] probs[N];
real SummedActs[N];
// loop over subjects and conditions to compute activations and probabilites
for (i in 1:N){ // for each subject
acts_IIP[i] = scale_b + subj_pars[1,i] + subj_pars[2,i]; // Item in Position
acts_IOP[i] = scale_b + subj_pars[2,i]; // Item in Other Position
acts_DIP[i] = scale_b + f[i]*(subj_pars[1,i] + subj_pars[2,i]); // Distractor in Position
acts_DIOP[i] = scale_b + f[i]*subj_pars[2,i]; // Distractor in other Position
acts_NPL[i] = scale_b; // non presented Lure
SummedActs[i] = R[1] * acts_IIP[i] + R[2] * acts_IOP[i] + R[3] * acts_DIP[i]+ // account for different numbers auf DIP / DIOP
R[4] * acts_DIOP[i]+ R[5] * acts_NPL[i];
probs[i,1] = (R[1] * acts_IIP[i]) ./ (SummedActs[i]);
probs[i,2] = (R[2] * acts_IOP[i]) ./ (SummedActs[i]);
probs[i,3] = (R[3] * acts_DIP[i]) ./ (SummedActs[i]);
probs[i,4] = (R[4] * acts_DIOP[i]) ./ (SummedActs[i]);
probs[i,5] = (R[5] * acts_NPL[i]) ./ (SummedActs[i]);
}
}
model {
// priors for hyper parameters
target += normal_lpdf(hyper_pars[1] | 20, 10); // c
target += normal_lpdf(hyper_pars[2] | 2, 10); // a
target += normal_lpdf(hyper_pars[3] | 0, 10); // f
// priots for covariances and variances
L_Omega ~ lkj_corr_cholesky(2);
sigma ~ gamma(1,0.01);
// Loop over subjects
for (i in 1:N)
{
theta[,i] ~ normal(0,1);
}
for (i in 1:N) {
// draw data from probabilities determined by MMM parms
target += multinomial_lpmf(count[i] | probs[i]);
}
}
generated quantities{
vector[(J*(J-1))%/%2] cor_mat_lower_tri;
int count_rep[N,K];
vector[N] log_lik;
cor_mat_lower_tri = flatten_lower_tri(multiply_lower_tri_self_transpose(L_Omega));
for (i in 1:N)
{
count_rep[i,] = multinomial_rng(probs[i], retrievals);
log_lik[i] = multinomial_lpmf(count_rep[i,] | probs[i,]);
}
}
The model uses a inverse logit transformation for the f parameter, as it is supposed to be between 0 and 1. This works fine. Now I wanted to map the other parameters to a strictly positive space, without truncating anything. So I tried the a log link transformation with standard normal priors:
....
transformed parameters {
// non-centered multivariate
matrix[J,N] subj_pars = (
diag_pre_multiply( sigma, L_Omega )
* theta_raw
+ rep_matrix(hyper_pars,N)
) ;
// Transform f Parameter
real mu_f = inv_logit(hyper_pars[3]);
row_vector[N] f = inv_logit(subj_pars[3,]);
// map to positive space with exp transform (log-link)
row_vector[N] c = exp(subj_pars[1,]);
row_vector[N] a = exp(subj_pars[2,]);
// activations
real acts_IIP[N];
real acts_IOP[N];
real acts_DIP[N];
real acts_DIOP[N];
real acts_NPL[N];
// probabilities
vector[K] probs[N];
real SummedActs[N];
// loop over subjects and conditions to compute activations and probabilites
for (i in 1:N){ // for each subject
acts_IIP[i] = scale_b + c[i] + a[i]; // Item in Position
acts_IOP[i] = scale_b + a[i]; // Item in Other Position
acts_DIP[i] = scale_b + f[i]*(c[i] + a[i]); // Distractor in Position
acts_DIOP[i] = scale_b + f[i]*a[i]; // Distractor in other Position
acts_NPL[i] = scale_b; // non presented Lure
SummedActs[i] = R[1] * acts_IIP[i] + R[2] * acts_IOP[i] + R[3] * acts_DIP[i]+ // account for different numbers auf DIP / DIOP
R[4] * acts_DIOP[i]+ R[5] * acts_NPL[i];
probs[i,1] = (R[1] * acts_IIP[i]) ./ (SummedActs[i]);
probs[i,2] = (R[2] * acts_IOP[i]) ./ (SummedActs[i]);
probs[i,3] = (R[3] * acts_DIP[i]) ./ (SummedActs[i]);
probs[i,4] = (R[4] * acts_DIOP[i]) ./ (SummedActs[i]);
probs[i,5] = (R[5] * acts_NPL[i]) ./ (SummedActs[i]);
}
}
model {
// priors for hyper parameters
target += normal_lpdf(hyper_pars[1] | 0, 1); // c
target += normal_lpdf(hyper_pars[2] | 0, 1); // a
target += normal_lpdf(hyper_pars[3] | 0, 1); // f
// priots for covariances and variances
L_Omega ~ lkj_corr_cholesky(2);
sigma ~ gamma(1,0.01);
// Loop over subjects
for (i in 1:N)
{
theta_raw[,i] ~ normal(0,1);
}
....
So my questions are:
1.) Did I implement the log-link correctly ? Or do I have to use addtional scaling or other transformation first ?
2.) Because of the log-link, the priors I used produced extrem values which resulted in posteriors, which had heavy tails of the subject level parameters which seem kind of unbounded (because of the exponentiation):
I tried to adress this issues by using a standard normal prior for the hyper parameters, as I’m using a non-centered parametrization to calculate the subject level parameters. How could I do this better ? I also tried a softplus link, but this did reduce the posterior range.
Am I missing something here, or did I just implement the link function in the wrong way ?
greetings and thx for your answer !