Hi everyone,

Summary: I have a code below that I tried to make as efficient as possible. In previous posts, I have benefitted from tips on stability (using log-scale) and efficiency (reparametrizing, vectorizing). I have also optimized containers (array vs vector, MxN matrix vs NxM matrix, etc) using the Stan guide. Despite all this, running 4 chains in parallel (including generating a log_lik vector for LOO validation) took 3 hours for the first 15 iterations (and I have a total of 2000 including warm-up). Usually, I observed that linear projections over-estimate total run-time, but I’m looking at 1-2 weeks for relatively short chains of 2000 iterations each.

Is there any way to improve the efficiency further?

A few details: The model I’m trying to fit is the “smooth ambiguity model”, a popular model of decision-making in economics literature. It involves integrating over all possible beliefs (for example, in a bicolour, 100-ball urn, there are 99 possible beliefs about the urn’s composition- I model beliefs about the urn’s composition as a beta distribution). The use of inc_beta and inv_inc_beta could be a factor slowing the computations. However, since I need area under beta distribution, there seems to be no alternative.

Concerns were also raised about use of the centered parametrization; with a few partial estimations, I have seen that this is not a major issue in my case. I have 51 data points each for 80 individuals to (hierarchically) estimate 13 parameters. Non-centered approach tends to take even longer to run, and I get reliable estimates for both mu and sigma even with a centered approach.

``````functions{

real wpkt(real a, real p){
real a_log_p = lmultiply(a, p);
return exp(a_log_p - inv(a) * log_sum_exp(a_log_p, a * log1m(p)));
}

real wppr(real a, real b, real p){
if( p==0 || p==1 ){return p;}
else{return exp(-b*((-log(p))^a));}
}

vector u_inv(real u_param, vector x){
return 200 * pow(x, inv(u_param));
}

vector phi(real theta, real rho, vector x){
return exp(log1m_exp(-theta * pow(x, rho)) - log1m_exp(-theta));
}

vector phi_inv(real theta, real rho, vector y){
return pow((-log1m_exp(log(y) + log1m_exp(-theta))/theta), inv(rho));
}

vector model_risky(real ui, real wri){
real wpkt_wr_05 = wpkt(wri,0.5);
real one_minus_wpkt_wr_05 = 1-wpkt_wr_05;
return u_inv(ui, [(wpkt(wri,0.1)), (wpkt(wri,0.3)), (wpkt_wr_05), (wpkt(wri,0.7)), (wpkt(wri,0.9)),
((0.8^ui)*wpkt_wr_05 + (0.1^ui)*one_minus_wpkt_wr_05), ((0.6^ui)*wpkt_wr_05 + (0.2^ui)*one_minus_wpkt_wr_05),
((0.5^ui)*wpkt_wr_05), ((0.3^ui)*wpkt_wr_05)]');
}

vector model_exch(real aBRi, real bBRi, real aGPi, real bGPi){
real beta_04_BR = inc_beta(aBRi,bBRi,0.4);
real beta_06_BR = inc_beta(aBRi,bBRi,0.6);
real beta_06_GP = inc_beta(aGPi,bGPi,0.6);
real beta_08_GP = inc_beta(aGPi,bGPi,0.8);
return [inv_inc_beta(aBRi,bBRi,(beta_06_BR*0.5)),
inv_inc_beta(aBRi,bBRi,(beta_06_BR+(1-beta_06_BR)*0.5)),
inv_inc_beta(aBRi,bBRi,(inc_beta(aBRi,bBRi,0.8)*0.5)),
inv_inc_beta(aBRi,bBRi,(beta_04_BR+(1-beta_04_BR)*0.5)),
inv_inc_beta(aBRi,bBRi,0.5),
inv_inc_beta(aGPi,bGPi,(beta_08_GP*0.5)),
inv_inc_beta(aGPi,bGPi,(beta_08_GP+(1-beta_08_GP)*0.5)),
inv_inc_beta(aGPi,bGPi,(inc_beta(aGPi,bGPi,0.9)*0.5)),
inv_inc_beta(aGPi,bGPi,(beta_06_GP+(1-beta_06_GP)*0.5)),
inv_inc_beta(aGPi,bGPi,0.5)]';
}

matrix beta_xy(real waai, real wabi, real x, real y){
matrix[100,3] mat = rep_matrix(0,100,3);
matrix[99,2] dmat = rep_matrix(0,99,2);
vector[100] o2h = [0.005, 0.015, 0.025, 0.035, 0.045, 0.055, 0.065, 0.075, 0.085, 0.095, 0.105,
0.115, 0.125, 0.135, 0.145, 0.155, 0.165, 0.175, 0.185, 0.195, 0.205,
0.215, 0.225, 0.235, 0.245, 0.255, 0.265, 0.275, 0.285, 0.295, 0.305,
0.315, 0.325, 0.335, 0.345, 0.355, 0.365, 0.375, 0.385, 0.395, 0.405,
0.415, 0.425, 0.435, 0.445, 0.455, 0.465, 0.475, 0.485, 0.495, 0.505,
0.515, 0.525, 0.535, 0.545, 0.555, 0.565, 0.575, 0.585, 0.595, 0.605,
0.615, 0.625, 0.635, 0.645, 0.655, 0.665, 0.675, 0.685, 0.695, 0.705,
0.715, 0.725, 0.735, 0.745, 0.755, 0.765, 0.775, 0.785, 0.795, 0.805,
0.815, 0.825, 0.835, 0.845, 0.855, 0.865, 0.875, 0.885, 0.895, 0.905,
0.915, 0.925, 0.935, 0.945, 0.955, 0.965, 0.975, 0.985, 0.995]';
for (i in 1:100){
mat[i,1] = inc_beta(x, y, o2h[i]);
}
real base = mat[1,1];
real altbase = mat[100,1];
for (i in 1:100){
mat[i,2] = wppr(waai, wabi, (mat[i,1] - base));
}
for (i in 1:100){
mat[i,3] = wppr(waai, wabi, (altbase - mat[(101-i),1]));
}
for (i in 1:99){
dmat[i,1] = mat[i+1,2]-mat[i,2];
}
for (i in 1:99){
dmat[i,2] = mat[i+1,3]-mat[i,3];
}
return dmat;
}

vector model_ambi(real ui, real wri, real thetai, real rhoi, real waai, real wabi, real aBRi, real bBRi, real aGPi, real bGPi){
real ui125 = 0.125^ui;
real ui250 = 0.25^ui;
real ui750 = 0.75^ui;
vector[8] utili_ambl = [0, 0, 0, 0, ui250, ui250, ui125, ui125]';
vector[8] utili_ambu = [1, ui750, 0.5^ui, ui250, 1, ui750, 0.625^ui, 0.375^ui]';
matrix[99,2] BRi = beta_xy(waai, wabi, aBRi, bBRi);
matrix[99,2] GPi = beta_xy(waai, wabi, aGPi, bGPi);
vector[32] sumi_amb = rep_vector(0,32);
vector[32] ambiguous = rep_vector(0,32);
matrix[8,99] rdui = rep_matrix(0,8,99);
vector[99] o2h2 = [0.99, 0.98, 0.97, 0.96, 0.95, 0.94, 0.93, 0.92, 0.91, 0.9,
0.89, 0.88, 0.87, 0.86, 0.85, 0.84, 0.83, 0.82, 0.81, 0.8,
0.79, 0.78, 0.77, 0.76, 0.75, 0.74, 0.73, 0.72, 0.71, 0.7,
0.69, 0.68, 0.67, 0.66, 0.65, 0.64, 0.63, 0.62, 0.61, 0.6,
0.59, 0.58, 0.57, 0.56, 0.55, 0.54, 0.53, 0.52, 0.51, 0.5,
0.49, 0.48, 0.47, 0.46, 0.45, 0.44, 0.43, 0.42, 0.41, 0.4,
0.39, 0.38, 0.37, 0.36, 0.35, 0.34, 0.33, 0.32, 0.31, 0.3,
0.29, 0.28, 0.27, 0.26, 0.25, 0.24, 0.23, 0.22, 0.21, 0.2,
0.19, 0.18, 0.17, 0.16, 0.15, 0.14, 0.13, 0.12, 0.11, 0.1,
0.09, 0.08, 0.07, 0.06, 0.05, 0.04, 0.03, 0.02, 0.01]';
for (k in 1:99) {
real weightik = wpkt(wri,o2h2[k]);
rdui[1:8,k] =  utili_ambu*weightik+utili_ambl*(1-weightik);
}
sumi_amb[1:8] = rdui*BRi[1:99,2];
sumi_amb[9:16] = rdui*BRi[1:99,1];
sumi_amb[17:24] = rdui*GPi[1:99,2];
sumi_amb[25:32] = rdui*GPi[1:99,1];
ambiguous = u_inv(ui, phi_inv(thetai,rhoi,sumi_amb));
return ambiguous;
}

}

// The input data is a vector 'y' of length 'N'.
data {

matrix[9, 80] Yr;
matrix[10, 80] Ye;
matrix[32, 80] Ya;

}

// The parameters accepted by the model.
parameters {

// Mu params ----

real<lower=0> mu_u;
real<lower=0> mu_wr;
real<lower=0> mu_sigma_r;

real<lower=0, upper=1> mu_mean_BR;
real<lower=0> mu_ss_BR;
real<lower=0, upper=1> mu_mean_GP;
real<lower=0> mu_ss_GP;
real<lower=0> mu_sigma_e;

real mu_theta;
real<lower=0> mu_rho;
real<lower=0> mu_waa;
real<lower=0> mu_wab;
real<lower=0> mu_sigma_a;

// Sigma params ----

real<lower=0> sigma_u;
real<lower=0> sigma_wr;
real<lower=0> sigma_sigma_r;

real<lower=0, upper=0.5> sigma_mean_BR;
real<lower=0> sigma_ss_BR;
real<lower=0, upper=0.5> sigma_mean_GP;
real<lower=0> sigma_ss_GP;
real<lower=0> sigma_sigma_e;

real<lower=0> sigma_theta;
real<lower=0> sigma_rho;
real<lower=0> sigma_waa;
real<lower=0> sigma_wab;
real<lower=0> sigma_sigma_a;

// Individual level params ----

vector<lower=0>[80] u;
vector<lower=0>[80] wr;
vector<lower=0>[80] sigma_r;

vector<lower=0, upper=1>[80] mean_BR;
vector<lower=0>[80] ss_BR;
vector<lower=0, upper=1>[80] mean_GP;
vector<lower=0>[80] ss_GP;
vector<lower=0>[80] sigma_e;

vector[80] theta;
vector<lower=0>[80] rho;
vector<lower=0>[80] waa;
vector<lower=0>[80] wab;
vector<lower=0>[80] sigma_a;

}

transformed parameters {

vector[80] aBR;
vector[80] bBR;
vector[80] aGP;
vector[80] bGP;

for (i in 1:80){
aBR[i] = mean_BR[i]*ss_BR[i];
bBR[i] = ss_BR[i]-aBR[i];
aGP[i] = mean_GP[i]*ss_GP[i];
bGP[i] = ss_GP[i]-aGP[i];
}

}

// The model to be estimated.
model {
// priors
{

// mu ----
{

mu_u ~ normal(0.7, 0.2);
mu_wr ~ normal(0.7, 0.2);
mu_sigma_r ~ normal(10, 10);

mu_mean_BR ~ normal(0.7, 0.2);
mu_ss_BR ~ normal(10, 10);
mu_mean_GP ~ normal(0.7, 0.2);
mu_ss_GP ~ normal(10, 10);
mu_sigma_e ~ normal(0.05, 0.05);

mu_theta ~ normal(10, 20);
mu_rho ~ normal(5, 10);
mu_waa ~ normal(0.7, 0.5);
mu_wab ~ normal(1.5, 1);
mu_sigma_a ~ normal(10, 20);

}

// sigma ----
{

sigma_u ~ normal(0.2, 0.1);
sigma_wr ~ normal(0.2, 0.1);
sigma_sigma_r ~ normal(5, 5);

sigma_mean_BR ~ normal(0.1, 0.1);
sigma_ss_BR ~ normal(5, 5);
sigma_mean_GP ~ normal(0.1, 0.1);
sigma_ss_GP ~ normal(5, 5);
sigma_sigma_e ~ normal(0.01, 0.01);

sigma_theta ~ normal(10, 20);
sigma_rho ~ normal(5, 10);
sigma_waa ~ normal(0.25, 0.5);
sigma_wab ~ normal(0.5, 1);
sigma_sigma_a ~ normal(5, 10);

}

// individual level ----
{

u ~ normal(mu_u, sigma_u);
wr ~ normal(mu_wr, sigma_wr);
sigma_r ~ normal(mu_sigma_r, sigma_sigma_r);

mean_BR ~ normal(mu_mean_BR, sigma_mean_BR);
ss_BR ~ normal(mu_ss_BR, sigma_ss_BR);
mean_GP ~ normal(mu_mean_GP, sigma_mean_GP);
ss_GP ~ normal(mu_ss_GP, sigma_ss_GP);
sigma_e ~ normal(mu_sigma_e, sigma_sigma_e);

theta ~ normal(mu_theta,sigma_theta);
rho ~ normal(mu_rho,sigma_rho);
waa ~ normal(mu_waa, sigma_waa);
wab ~ normal(mu_wab, sigma_wab);
sigma_a ~ normal(mu_sigma_a, sigma_sigma_a);

}

}

// likelihood
for (i in 1:80){
vector[9] modelr = model_risky(u[i],wr[i]);
vector[10] modele = model_exch(aBR[i],bBR[i],aGP[i],bGP[i]);
vector[32] modela = model_ambi(u[i],wr[i],theta[i],rho[i],waa[i],wab[i],aBR[i],bBR[i],aGP[i],bGP[i]);
Yr[  , i ] ~ normal(modelr, sigma_r[i]);
Ye[  , i ] ~ normal(modele, sigma_e[i]);
Ya[  , i ] ~ normal(modela, sigma_a[i]);
}

}

generated quantities {

real mu_aBR = mu_mean_BR*mu_ss_BR;
real mu_bBR = mu_ss_BR-mu_aBR;
real mu_aGP = mu_mean_GP*mu_ss_GP;
real mu_bGP = mu_ss_GP-mu_aGP;
vector[4080] log_lik;
for (i in 1:80){
vector[9] modelr = model_risky(u[i],wr[i]);
vector[10] modele = model_exch(aBR[i],bBR[i],aGP[i],bGP[i]);
vector[32] modela = model_ambi(u[i],wr[i],theta[i],rho[i],waa[i],wab[i],aBR[i],bBR[i],aGP[i],bGP[i]);
int indexr = 51*(i-1);
int indexe = 51*(i-1)+9;
int indexa = 51*(i-1)+19;
for (j in 1:9) {
log_lik[indexr+j] = normal_lpdf(Yr[ j , i ] | modelr[j], sigma_r[i]);
}
for (j in 1:10) {
log_lik[indexe+j] = normal_lpdf(Ye[ j , i ] | modele[j], sigma_e[i]);
}
for (j in 1:32) {
log_lik[indexa+j] = normal_lpdf(Ya[ j , i ] | modela[j], sigma_a[i]);
}
}

}

``````

I don’t have time to dive deeply into this at the moment but is it possible to omit the `rep_*`? These are unnecessary if you pass values into the vector and matrix later.