Hey folks !
I recently did some non-centered reparametrization on some of my models, and at least for my models who sample the parameters from independent normals this worked out very well (faster, no divergences etc.). However, I have now implemented the same non-centered reparametrization in the multivariate version of the model (which worked out ok but could be better asit has some divergences and convergence problems). The non-centered model works though, but not as well as expected. I think this is more about some errors I made as about the reparamtrization itself. So I have two questions on this:
1.) I used the reference manual to implement the cholesky non-centered, but I’m just not sure whether this works as intended, because I estimate the correlation matrix as well as sigma simultanioulsy. If I would assume uncorrelated parameters and just surpass an identity matrix as correlation matrix, would this be a valid method to simplyfy the model in this case? And: Is this non-centered implementation “correct” so far?
2.) How could I provide initial values in this case, that actually satisfy the sampler to be positiv ? As One Parameter is on log_scale, this should have boudaries from -Inf to Inf. Sampling all Parameters from the same std_normal is a problem here. Would it be “effiecient” or reasonable to outsource this parameter from the MVN and sample it independently?
I maybe I can find some ideas here, help and opinions are appreciated ! Thanks for reading and your Opinions !
*J
Here are model codes for the non-centered and centered model:
Non-Centered:
data {
int <lower=0> N; // number rownumber
int <lower=0> K; // categories
int <lower=0> J; // Dims of Cov Matrix
int <lower=0> Con; // number of FT categories
int R[K]; // number of responses per category
int count[N*Con,K]; // observed data
real scale_b; // set scaling for background noise
vector[Con] Freetime; // freetime conditions after distractor encoding (e.g. 500 ms, 1000 ms or 1500 ms)
int retrievals;
matrix[N*Con,2] d_weight; // Weights for Response Category inbalances
}
parameters {
// Defining vector for hyper and subject parameters
corr_matrix[J] Omega;
vector<lower=0>[J] sigma;
vector [J] hyper_pars;
vector[J] theta[N];
}
transformed parameters{
// Transform f Parameter
vector[J] subj_pars[N];
matrix[J, J] L;
real f[N] = inv_logit(theta[,3]);
real mu_f = inv_logit(hyper_pars[3]);
L = cholesky_decompose(Omega);
for (i in 1:N)
{
subj_pars[i,] = hyper_pars + sigma .* (L * theta[i,]);
}
// activations
real acts_IIP[N*Con];
real acts_IOP[N*Con];
real acts_DIP[N*Con];
real acts_DIOP[N*Con];
real acts_NPL[N*Con];
// probabilities
vector[K] probs[N*Con];
real SummedActs[N*Con];
// loop over subjects and conditions to compute activations and probabilites
for (i in 1:N){ // for each subject
for(j in 1:Con) {
acts_IIP[j + (i-1)*Con] = scale_b + (1+ subj_pars[i,4]*Freetime[j])* (subj_pars[i,1] + subj_pars[i,2]); // Item in Position
acts_IOP[j + (i-1)*Con] = scale_b + (1+ subj_pars[i,4]*Freetime[j])* subj_pars[i,2]; // Item in Other Position
acts_DIP[j + (i-1)*Con] = scale_b + f[i]*((exp(-subj_pars[i,5]*Freetime[j])*subj_pars[i,1]) + subj_pars[i,2]); // Distractor in Position
acts_DIOP[j + (i-1)*Con] = scale_b + f[i]*subj_pars[i,2]; // Distractor in other Position
acts_NPL[j + (i-1)*Con] = scale_b; // non presented Lure
SummedActs[j + (i-1)*Con] = R[1] * acts_IIP[j + (i-1)*Con] + R[2] * acts_IOP[j + (i-1)*Con] + d_weight[j + (i-1)*Con,1] * acts_DIP[j + (i-1)*Con]+ // account for different numbers auf DIP / DIOP
d_weight[j + (i-1)*Con, 2] * acts_DIOP[j + (i-1)*Con]+ R[5] * acts_NPL[j + (i-1)*Con];
probs[j + (i-1)*Con,1] = (R[1] * acts_IIP[j + (i-1)*Con]) ./ (SummedActs[j + (i-1)*Con]);
probs[j + (i-1)*Con,2] = (R[2] * acts_IOP[j + (i-1)*Con]) ./ (SummedActs[j + (i-1)*Con]);
probs[j + (i-1)*Con,3] = (d_weight[j + (i-1)*Con,1] * acts_DIP[j + (i-1)*Con]) ./ (SummedActs[j + (i-1)*Con]);
probs[j + (i-1)*Con,4] = (d_weight[j + (i-1)*Con, 2] * acts_DIOP[j + (i-1)*Con]) ./ (SummedActs[j + (i-1)*Con]);
probs[j + (i-1)*Con,5] = (R[5] * acts_NPL[j + (i-1)*Con]) ./ (SummedActs[j + (i-1)*Con]);
}
}
}
model {
// priors for hyper parameters
hyper_pars[1] ~ normal(20,10); // c
hyper_pars[2] ~ normal(2,10); // a
hyper_pars[3] ~ normal(0,10); // f
hyper_pars[4] ~ normal(1,10); // EE
hyper_pars[5] ~ normal(1,10); // r
//
Omega ~ lkj_corr(2);
sigma ~ gamma(1,0.01);
// Loop over subjects
for (i in 1:N)
{
theta[i,] ~ normal(0,1);
}
for (j in 1:Con) {
for (i in 1:N) {
// draw data from probabilities determined by MMM parms
count[j + (i-1)*Con,] ~ multinomial(probs[j + (i-1)*Con,]);
}
}
}
Centered:
data {
int <lower=0> N; // number rownumber
int <lower=0> K; // categories
int <lower=0> J; // Dims of Cov Matrix
int <lower=0> Con; // number of categories
int R[K]; // number of responses per category
int count[N*Con,K]; // observed data
real scale_b; // set scaling for background noise
vector[Con] Freetime ; // freetime conditions after distractor encoding (e.g. 500 ms, 1000 ms or 1500 ms)
}
parameters {
// Defining vector for hyper and subject parameters
corr_matrix[J] Omega;
vector<lower=0>[J] sigma;
vector [J] hyper_pars;
vector [J] subj_pars[N];
}
transformed parameters{
// Transform f Parameter
real f[N] = inv_logit(subj_pars[,3]);
real mu_f = inv_logit(hyper_pars[3]);
// activations
real acts_IIP[N*Con];
real acts_IOP[N*Con];
real acts_DIP[N*Con];
real acts_DIOP[N*Con];
real acts_NPL[N*Con];
// probabilities
vector[K] probs[N*Con];
real SummedActs[N*Con];
// loop over subjects and conditions to compute activations and probabilites
for (i in 1:N){ // for each subject
for(j in 1:Con) {
acts_IIP[j + (i-1)*Con] = scale_b + (1+ subj_pars[i,4]*Freetime[j])*( subj_pars[i,2] + subj_pars[i,1]); // Item in Position
acts_IOP[j + (i-1)*Con] = scale_b + (1+ subj_pars[i,4]*Freetime[j])* subj_pars[i,2]; // Item in Other Position
acts_DIP[j + (i-1)*Con] = scale_b + exp(-subj_pars[i,5]*Freetime[j])* subj_pars[i,3]*( subj_pars[i,2]+ subj_pars[i,1]);// Distractor in Position
acts_DIOP[j + (i-1)*Con] = scale_b + exp(-subj_pars[i,5]*Freetime[j])* subj_pars[i,3]* subj_pars[i,2]; // Distractor in other Position
acts_NPL[j + (i-1)*Con] = scale_b; // non presented Lure
SummedActs[j + (i-1)*Con] = R[1] * acts_IIP[j + (i-1)*Con] + R[2] * acts_IOP[j + (i-1)*Con] + R[3] * acts_DIP[j + (i-1)*Con]+
R[4] * acts_DIOP[j + (i-1)*Con]+ R[5] * acts_NPL[j + (i-1)*Con];
probs[j + (i-1)*Con,1] = (R[1] * acts_IIP[j + (i-1)*Con]) ./ (SummedActs[j + (i-1)*Con]);
probs[j + (i-1)*Con,2] = (R[2] * acts_IOP[j + (i-1)*Con]) ./ (SummedActs[j + (i-1)*Con]);
probs[j + (i-1)*Con,3] = (R[3] * acts_DIP[j + (i-1)*Con]) ./ (SummedActs[j + (i-1)*Con]);
probs[j + (i-1)*Con,4] = (R[4] * acts_DIOP[j + (i-1)*Con]) ./ (SummedActs[j + (i-1)*Con]);
probs[j + (i-1)*Con,5] = (R[5] * acts_NPL[j + (i-1)*Con]) ./ (SummedActs[j + (i-1)*Con]);
}
}
}
model {
// priors for hyper parameters
hyper_pars[1] ~ normal(20,10); // c
hyper_pars[2] ~ normal(2,10); // a
hyper_pars[3] ~ normal(0,10); //f
hyper_pars[4] ~ normal(1,10); // EE
hyper_pars[5] ~ normal(1,10); // r
Omega ~ lkj_corr(2);
sigma ~ gamma(1,0.01);
// Loop over subjects
subj_pars[,] ~ multi_normal(hyper_pars,quad_form_diag(Omega, sigma));
for (j in 1:Con) {
for (i in 1:N) {
// draw data from probabilities determined by MMM parms
count[j + (i-1)*Con,] ~ multinomial(probs[j + (i-1)*Con,]);
}
}
}