Hi @Bob_Carpenter, thank you so much for your elaborate reply. This is the complete code that I used (dataset is at Data_Used.csv - Google Drive ):
functions{
real inc_beta_inverse(real x, real p, real q) {
real a;
real acu;
real adj;
real fpu;
real g;
real h;
real iex;
int indx;
real pp;
real prev;
real qq;
real r;
real s;
real sae = -30.0;
real sq;
real t;
real tx;
real value = x;
real w;
real xin;
real y;
real yprev;
real lbeta_val = lbeta(p, q);
fpu = pow(10.0, sae);
if (is_nan(x) || is_inf(x) || x < 0 || x > 1)
reject("inc_beta_inverse: x must be finite and between 0 and 1; ", "found x = ", x);
if (p <= 0.0)
reject("inc_beta_inverse: p must be > 0; ", "found p = ", p);
if (q <= 0.0)
reject("inc_beta_inverse: q must be > 0; ", "found q = ", q);
// If the answer is easy to determine, return immediately.
if (x == 0.0)
return value;
if (x == 1.0)
return value;
// Change tail if necessary.
if (0.5 < x) {
a = 1.0 - x;
pp = q;
qq = p;
indx = 1;
} else {
a = x;
pp = p;
qq = q;
indx = 0;
}
// Calculate the initial approximation.
r = sqrt(-log(square(a)));
y = r - (2.30753 + 0.27061 * r) / (1.0 + (0.99229 + 0.04481 * r) * r);
if (1.0 < pp && 1.0 < qq) {
r = (square(y) - 3.0) / 6.0;
s = 1.0 / (pp + pp - 1.0);
t = 1.0 / (qq + qq - 1.0);
h = 2.0 / (s + t);
w = y * sqrt(h + r) / h - (t - s) * (r + 5.0 / 6.0 - 2.0 / (3.0 * h));
value = pp / (pp + qq * exp(w + w));
} else {
r = qq + qq;
t = 1.0 / (9.0 * qq);
t = r * pow(1.0 - t + y * sqrt(t), 3);
if (t <= 0.0) {
value = 1.0 - exp((log((1.0 - a) * qq) + lbeta_val) / qq);
} else {
t = (4.0 * pp + r - 2.0) / t;
if (t <= 1.0) {
value = exp((log(a * pp) + lbeta_val) / pp);
} else {
value = 1.0 - 2.0 / (t + 1.0);
}
}
}
// Solve for X by a modified Newton-Raphson method,
// using the function inc_beta.
r = 1.0 - pp;
t = 1.0 - qq;
yprev = 0.0;
sq = 1.0;
prev = 1.0;
if (value < 0.0001)
value = 0.0001;
if (0.9999 < value)
value = 0.9999;
iex = fmax(-5.0 / pp / pp - 1.0 / pow(a, 0.2) - 13.0, sae);
acu = pow(10.0, iex);
// Iteration loop.
while (1) {
y = inc_beta(pp, qq, value);
xin = value;
y = (y - a) * exp(lbeta_val + r * log(xin) + t * log(1.0 - xin));
if (y * yprev <= 0.0)
prev = fmax(sq, fpu);
g = 1.0;
while (1) {
while (1) {
adj = g * y;
sq = square(adj);
if (sq < prev) {
tx = value - adj;
if (0.0 <= tx && tx <= 1.0)
break;
}
g = g / 3.0;
}
// Check whether the current estimate is acceptable.
// The change "VALUE = TX" was suggested by Ivan Ukhov.
if (prev <= acu || y * y <= acu) {
value = tx;
if (indx == 1)
value = 1.0 - value;
return value;
}
if (tx != 0.0 && tx != 1.0)
break;
g = g / 3.0;
}
if (tx == value)
break;
value = tx;
yprev = y;
}
if (indx == 1)
value = 1.0 - value;
return value;
}
real wpkt(real a, real p){return (p^a)/((p^a+(1-p)^a)^(1/a));}
real wppr(real a, real b, real p){return exp(-b*((-log(p))^a));}
vector u_inv(real u_param, vector x){
return(200*[x[1]^(1/u_param),x[2]^(1/u_param),x[3]^(1/u_param),x[4]^(1/u_param),x[5]^(1/u_param),x[6]^(1/u_param),x[7]^(1/u_param),x[8]^(1/u_param),
x[9]^(1/u_param),x[10]^(1/u_param),x[11]^(1/u_param),x[12]^(1/u_param),x[13]^(1/u_param),x[14]^(1/u_param),x[15]^(1/u_param),x[16]^(1/u_param),
x[17]^(1/u_param),x[18]^(1/u_param),x[19]^(1/u_param),x[20]^(1/u_param),x[21]^(1/u_param),x[22]^(1/u_param),x[23]^(1/u_param),x[24]^(1/u_param),
x[25]^(1/u_param),x[26]^(1/u_param),x[27]^(1/u_param),x[28]^(1/u_param),x[29]^(1/u_param),x[30]^(1/u_param),x[31]^(1/u_param),x[32]^(1/u_param)]');
}
vector phi(real theta, real rho, vector x){
return([(1-exp(-theta*(x[1]^rho)))/(1-exp(-theta)),(1-exp(-theta*(x[2]^rho)))/(1-exp(-theta)),(1-exp(-theta*(x[3]^rho)))/(1-exp(-theta)),(1-exp(-theta*(x[4]^rho)))/(1-exp(-theta)),
(1-exp(-theta*(x[5]^rho)))/(1-exp(-theta)),(1-exp(-theta*(x[6]^rho)))/(1-exp(-theta)),(1-exp(-theta*(x[7]^rho)))/(1-exp(-theta)),(1-exp(-theta*(x[8]^rho)))/(1-exp(-theta))]');
}
vector phi_inv(real theta, real rho, vector y){
return([((-log(1-(y[1]*(1-exp(-theta)))))/theta)^(1/rho),((-log(1-(y[2]*(1-exp(-theta)))))/theta)^(1/rho),((-log(1-(y[3]*(1-exp(-theta)))))/theta)^(1/rho),((-log(1-(y[4]*(1-exp(-theta)))))/theta)^(1/rho),
((-log(1-(y[5]*(1-exp(-theta)))))/theta)^(1/rho),((-log(1-(y[6]*(1-exp(-theta)))))/theta)^(1/rho),((-log(1-(y[7]*(1-exp(-theta)))))/theta)^(1/rho),((-log(1-(y[8]*(1-exp(-theta)))))/theta)^(1/rho),
((-log(1-(y[9]*(1-exp(-theta)))))/theta)^(1/rho),((-log(1-(y[10]*(1-exp(-theta)))))/theta)^(1/rho),((-log(1-(y[11]*(1-exp(-theta)))))/theta)^(1/rho),((-log(1-(y[12]*(1-exp(-theta)))))/theta)^(1/rho),
((-log(1-(y[13]*(1-exp(-theta)))))/theta)^(1/rho),((-log(1-(y[14]*(1-exp(-theta)))))/theta)^(1/rho),((-log(1-(y[15]*(1-exp(-theta)))))/theta)^(1/rho),((-log(1-(y[16]*(1-exp(-theta)))))/theta)^(1/rho),
((-log(1-(y[17]*(1-exp(-theta)))))/theta)^(1/rho),((-log(1-(y[18]*(1-exp(-theta)))))/theta)^(1/rho),((-log(1-(y[19]*(1-exp(-theta)))))/theta)^(1/rho),((-log(1-(y[20]*(1-exp(-theta)))))/theta)^(1/rho),
((-log(1-(y[21]*(1-exp(-theta)))))/theta)^(1/rho),((-log(1-(y[22]*(1-exp(-theta)))))/theta)^(1/rho),((-log(1-(y[23]*(1-exp(-theta)))))/theta)^(1/rho),((-log(1-(y[24]*(1-exp(-theta)))))/theta)^(1/rho),
((-log(1-(y[25]*(1-exp(-theta)))))/theta)^(1/rho),((-log(1-(y[26]*(1-exp(-theta)))))/theta)^(1/rho),((-log(1-(y[27]*(1-exp(-theta)))))/theta)^(1/rho),((-log(1-(y[28]*(1-exp(-theta)))))/theta)^(1/rho),
((-log(1-(y[29]*(1-exp(-theta)))))/theta)^(1/rho),((-log(1-(y[30]*(1-exp(-theta)))))/theta)^(1/rho),((-log(1-(y[31]*(1-exp(-theta)))))/theta)^(1/rho),((-log(1-(y[32]*(1-exp(-theta)))))/theta)^(1/rho)]');}
matrix beta_xy(real x, real y){
matrix[100,3] mat;
mat = rep_matrix(0,100,3);
for (i in 1:100){
mat[i,1] = (i-1)*0.01;
mat[i,2] = beta_cdf(mat[i,1]+0.005, x, y) - beta_cdf(0.005, x, y);
mat[i,3] = beta_cdf(mat[i,1]+0.005, y, x) - beta_cdf(0.005, y, x);
}
return mat;
}
vector model_ambi(real u, real wr, real theta, real rho, real waa, real wab, real aBR, real bBR, real aGP, real bGP){
vector [8] util_ambl = [0^u/200^u,0^u/200^u,0^u/200^u,0^u/200^u,50^u/200^u,50^u/200^u,25^u/200^u,25^u/200^u]';
vector [8] util_ambu = [200^u/200^u,150^u/200^u,100^u/200^u,50^u/200^u,200^u/200^u,150^u/200^u,125^u/200^u,75^u/200^u]';
vector[32] ambiguous;
vector[32] sum_amb;
matrix[100,3] BR;
matrix[100,3] GP;
ambiguous = rep_vector(0,32);
sum_amb = rep_vector(0,32);
BR = beta_xy(aBR, bBR);
GP = beta_xy(aGP, bGP);
// print("paramsvalues")
// print(u, wr, theta, rho, waa, wab, aBR, bBR, aGP, bGP);
for (k in 2:98) {
real weight = wpkt(wr,(100-k)*0.01);
sum_amb[1:8] = sum_amb[1:8] + phi(theta,rho,util_ambu*weight+util_ambl*(1-weight))*(wppr(waa,wab,BR[k+1,3])-wppr(waa,wab,BR[k,3]));
sum_amb[9:16] = sum_amb[9:16] + phi(theta,rho,util_ambu*weight+util_ambl*(1-weight))*(wppr(waa,wab,BR[k+1,2])-wppr(waa,wab,BR[k,2]));
sum_amb[17:24] = sum_amb[17:24] + phi(theta,rho,util_ambu*weight+util_ambl*(1-weight))*(wppr(waa,wab,GP[k+1,3])-wppr(waa,wab,GP[k,3]));
sum_amb[25:32] = sum_amb[25:32] + phi(theta,rho,util_ambu*weight+util_ambl*(1-weight))*(wppr(waa,wab,GP[k+1,2])-wppr(waa,wab,GP[k,2]));
}
ambiguous = u_inv(u, phi_inv(theta,rho,sum_amb));
return ambiguous;
}
}
// The input data is a vector 'y' of length 'N'.
data {
int<lower=0> N; // number of individuals = 80
// int<lower=0> M; // number of tasks = 51
// matrix[N, M] Y; // data
matrix[N, 51] Y; // data
// matrix[N, 8] prior_estimates; // prior estimates
}
// The parameters accepted by the model.
parameters {
// Individual level params ----
real<lower=0, upper=10> u[N];
real<lower=0, upper=1> wr[N];
real<lower=0, upper=100> sigma_r[N];
real<lower=1, upper=100> aBR[N];
real<lower=1, upper=100> bBR[N];
real<lower=1, upper=100> aGP[N];
real<lower=1, upper=100> bGP[N];
real<lower=0.001, upper=1> sigma_e[N];
real<lower=-100, upper=100> theta[N];
real<lower=0, upper=10> rho[N];
real<lower=0, upper=1> waa[N];
real<lower=0, upper=3> wab[N];
real<lower=0, upper=100> sigma_a[N];
// Mu params ----
real<lower=0, upper=2> mu_u;
real<lower=0, upper=1> mu_wr;
real<lower=0, upper=30> mu_sigma_r;
real<lower=1, upper=20> mu_aBR;
real<lower=1, upper=20> mu_bBR;
real<lower=1, upper=20> mu_aGP;
real<lower=1, upper=20> mu_bGP;
real<lower=0, upper=0.2> mu_sigma_e;
real<lower=0, upper=50> mu_theta;
real<lower=0, upper=10> mu_rho;
real<lower=0, upper=1> mu_waa;
real<lower=0, upper=3> mu_wab;
real<lower=0, upper=30> mu_sigma_a;
// Sigma2 params ----
real<lower=0> sigma2_u;
real<lower=0> sigma2_wr;
real<lower=0> sigma2_sigma_r;
real<lower=0, upper=100> sigma2_aBR;
real<lower=0, upper=100> sigma2_bBR;
real<lower=0, upper=100> sigma2_aGP;
real<lower=0, upper=100> sigma2_bGP;
real<lower=0, upper=0.02> sigma2_sigma_e;
real<lower=0> sigma2_theta;
real<lower=0> sigma2_rho;
real<lower=0> sigma2_waa;
real<lower=0> sigma2_wab;
real<lower=0> sigma2_sigma_a;
}
transformed parameters {
real<lower=0> sigma_u;
real<lower=0> sigma_wr;
real<lower=0> sigma_sigma_r;
real<lower=0> sigma_aBR;
real<lower=0> sigma_bBR;
real<lower=0> sigma_aGP;
real<lower=0> sigma_bGP;
real<lower=0> sigma_sigma_e;
real<lower=0> sigma_theta;
real<lower=0> sigma_rho;
real<lower=0> sigma_waa;
real<lower=0> sigma_wab;
real<lower=0> sigma_sigma_a;
// real<lower=0, upper=1> meanBR[N];
// real<lower=0, upper=1> meanGP[N];
// real<lower=0, upper=1> sigmaBR[N];
// real<lower=0, upper=1> sigmaGP[N];
sigma_u = sqrt(sigma2_u);
sigma_wr = sqrt(sigma2_wr);
sigma_sigma_r = sqrt(sigma2_sigma_r);
sigma_aBR = sqrt(sigma2_aBR);
sigma_bBR = sqrt(sigma2_bBR);
sigma_aGP = sqrt(sigma2_aGP);
sigma_bGP = sqrt(sigma2_bGP);
sigma_sigma_e = sqrt(sigma2_sigma_e);
sigma_theta = sqrt(sigma2_theta);
sigma_rho = sqrt(sigma2_rho);
sigma_waa = sqrt(sigma2_waa);
sigma_wab = sqrt(sigma2_wab);
sigma_sigma_a = sqrt(sigma2_sigma_a);
// meanBR[N] = aBR[N]/(aBR[N] + bBR[N]);
// meanGP[N] = aGP[N]/(aGP[N] + bGP[N]);
// sigmaBR[N] = sqrt(aBR[N]*bBR[N]/((aBR[N] + bBR[N])*(aBR[N] + bBR[N])*(aBR[N] + bBR[N] + 1)));
// sigmaGP[N] = sqrt(aGP[N]*bGP[N]/((aGP[N] + bGP[N])*(aGP[N] + bGP[N])*(aGP[N] + bGP[N] + 1)));
// vector[K] params = L + (U - L) .* params_raw;
}
// The model to be estimated.
model {
// priors
// mu ----
mu_u ~ normal(0.7, 0.4);
mu_wr ~ normal(0.7, 0.5);
mu_sigma_r ~ normal(25, 20);
mu_aBR ~ normal(6, 5);
mu_bBR ~ normal(6, 5);
mu_aGP ~ normal(6, 5);
mu_bGP ~ normal(6, 5);
mu_sigma_e ~ normal(0.1, 0.05);
mu_theta ~ normal(10, 100);
mu_rho ~ normal(3, 100);
mu_waa ~ normal(0.8, 0.5);
mu_wab ~ normal(1.5, 1);
mu_sigma_a ~ normal(20, 20);
// sigma2 ----
sigma2_u ~ normal(0.1,0.05);
sigma2_wr ~ normal(0.1,0.05);
sigma2_sigma_r ~ normal(400,400);
sigma2_aBR ~ normal(100, 100);
sigma2_bBR ~ normal(100, 100);
sigma2_aGP ~ normal(100, 100);
sigma2_bGP ~ normal(100, 100);
sigma2_sigma_e ~ normal(0.01, 0.01);
sigma2_theta ~ normal(100,100);
sigma2_rho ~ normal(10,10);
sigma2_waa ~ normal(0.1, 0.05);
sigma2_wab ~ normal(0.4, 0.2);
sigma2_sigma_a ~ normal(400, 400);
// individual level ----
u ~ normal(mu_u, sigma_u);
wr ~ normal(mu_wr, sigma_wr);
sigma_r ~ normal(mu_sigma_r, sigma_sigma_r);
aBR ~ normal(mu_aBR, sigma_aBR);
bBR ~ normal(mu_bBR, sigma_bBR);
aGP ~ normal(mu_aGP, sigma_aGP);
bGP ~ normal(mu_bGP, sigma_bGP);
sigma_e ~ normal(mu_sigma_e, sigma_sigma_e);
theta ~ normal(mu_theta,sigma_theta);
rho ~ normal(mu_rho,sigma_rho);
waa ~ normal(mu_waa, sigma_waa);
wab ~ normal(mu_wab, sigma_wab);
sigma_a ~ normal(mu_sigma_a, sigma_sigma_a);
// likelihood
for (i in 1:N){
int counter_risky = 0;
int counter_ambi = 0;
int counter_exch = 0;
vector[9] cache_risky = [((200^u[i])*wpkt(wr[i],0.1) + (0^u[i])*(1-wpkt(wr[i],0.1)))^(1/u[i]),
((200^u[i])*wpkt(wr[i],0.3) + (0^u[i])*(1-wpkt(wr[i],0.3)))^(1/u[i]), ((200^u[i])*wpkt(wr[i],0.5) + (0^u[i])*(1-wpkt(wr[i],0.5)))^(1/u[i]),
((200^u[i])*wpkt(wr[i],0.7) + (0^u[i])*(1-wpkt(wr[i],0.7)))^(1/u[i]), ((200^u[i])*wpkt(wr[i],0.9) + (0^u[i])*(1-wpkt(wr[i],0.9)))^(1/u[i]),
((160^u[i])*wpkt(wr[i],0.5) + (20^u[i])*(1-wpkt(wr[i],0.5)))^(1/u[i]), ((120^u[i])*wpkt(wr[i],0.5) + (40^u[i])*(1-wpkt(wr[i],0.5)))^(1/u[i]),
((100^u[i])*wpkt(wr[i],0.5) + (0^u[i])*(1-wpkt(wr[i],0.5)))^(1/u[i]), ((60^u[i])*wpkt(wr[i],0.5) + (0^u[i])*(1-wpkt(wr[i],0.5)))^(1/u[i])]';
vector[32] cache_ambi = model_ambi(u[i],wr[i],theta[i],rho[i],waa[i],wab[i],aBR[i],bBR[i],aGP[i],bGP[i]);
vector[10] cache_exch = [inc_beta_inverse((beta_cdf(0.6,aBR[i],bBR[i])*0.5),aBR[i],bBR[i]),
inc_beta_inverse((beta_cdf(0.6,aBR[i],bBR[i])+(1-beta_cdf(0.6,aBR[i],bBR[i]))*0.5),aBR[i],bBR[i]),
inc_beta_inverse((beta_cdf(0.8,aBR[i],bBR[i])*0.5),aBR[i],bBR[i]),
inc_beta_inverse((beta_cdf(0.4,aBR[i],bBR[i])+(1-beta_cdf(0.4,aBR[i],bBR[i]))*0.5),aBR[i],bBR[i]),
inc_beta_inverse(0.5,aBR[i],bBR[i]),
inc_beta_inverse((beta_cdf(0.8,aGP[i],bGP[i])*0.5),aGP[i],bGP[i]),
inc_beta_inverse((beta_cdf(0.8,aGP[i],bGP[i])+(1-beta_cdf(0.8,aGP[i],bGP[i]))*0.5),aGP[i],bGP[i]),
inc_beta_inverse((beta_cdf(0.9,aGP[i],bGP[i])*0.5),aGP[i],bGP[i]),
inc_beta_inverse((beta_cdf(0.6,aGP[i],bGP[i])+(1-beta_cdf(0.6,aGP[i],bGP[i]))*0.5),aGP[i],bGP[i]),
inc_beta_inverse(0.5,aGP[i],bGP[i])]';
for (ir in 1:9) {
counter_risky += 1;
Y[ i , ir ] ~ normal(cache_risky[counter_risky], sigma_r[i]);
}
for (ia in 10:41) {
counter_ambi += 1;
Y[ i , ia ] ~ normal(cache_ambi[counter_ambi], sigma_a[i]);
}
for (ie in 42:51) {
counter_exch += 1;
Y[ i , ie ] ~ normal(cache_exch[counter_exch], sigma_e[i]);
}
}
}
By “work on the log scale”, do you mean to work with log transformations of positive constrained parameters rather than the parameters themselves?
Indeed, mu_sigma_r and sigma_sigma_r are also parameters. To be specific, I have a list of parameters that capture preferences of every individual (total of 80 individuals): u, w_r, sigma_r, aBR, … , wab, sigma_a. I assume they are independent and drawn from population-level normal distributions (mu_u, sigma_u), (mu_wr, sigma_wr), (mu_sigma_r, sigma_sigma_r), etc. The priors of these parameters are also normal. I have a good idea of where theese parameters might be, so I could make my priors more informative; as it is, I also impose stronger bounds on these population-level parameters. Nevertheless, I found a few Stan articles where non-centered parametrizations are recommended in hierarchial models. Is your suggestion on the use of offset and multiplier equivalent to declaring aBR_raw as a parameter and aBR as a transformed parameter as follows:
aBR = mu_aBR + sigma_aBR * aBR_raw;
I shall look into reformulation of beta distributions as logistic distributions- I see that . In my case, the beta distribution is used to model beliefs about the composition of an unknown urn. For example, an urn has 100 balls of 2 colours; there is partial information that allows belief formation about the urn. The urn could have (0 balls of colour 1, 100 of colour 2), (1 of colour 1, 99 of colour 2), … , (99 of colour 1, 1 of colour 2), (100 of colour 1, 0 of colour 2). The beta allows for easy discretization over the possible compositions.
Lastly, while the model is computationally complex, the estimations seem robust enough. The data collected has also been tailored to meet certain requirements- such as neatly capturing the curvature of different model components. Thus, I’m reasonably confident that convergence won’t be an issue. Would you suggest modifying default adapt_delta and max_treedepth to trade-off speed with convergence?