Hello everyone. I am trying to fit a model. This is the stan code produced by make_stan function. However, the model takes hours to run and even in simpler versions of the model, the effective sample is low. The model I am trying to fit is the following:
log1p(Number of units sold)= 0 + (1 + log1(Price) + log1p(Distribution) + log1p(Distribution on Display) + log1p(Distribution on Leaflet) + log1p(Distribution Leaflet + Display)|Brand)
- fixed effects of all these variables.
functions {
}
data {
int<lower=1> N; // total number of observations
vector[N] Y; // response variable
int<lower=1> K; // number of population-level effects
matrix[N, K] X; // population-level design matrix
// data for group-level effects of ID 1
int<lower=1> J_1[N];
int<lower=1> N_1;
int<lower=1> M_1;
vector[N] Z_1_1;
vector[N] Z_1_2;
vector[N] Z_1_3;
vector[N] Z_1_4;
vector[N] Z_1_5;
vector[N] Z_1_6;
int<lower=1> NC_1;
int prior_only; // should the likelihood be ignored?
}
transformed data {
}
parameters {
vector[K] b; // population-level effects
real<lower=0> sigma; // residual SD
vector<lower=0>[M_1] sd_1; // group-level standard deviations
matrix[M_1, N_1] z_1; // unscaled group-level effects
// cholesky factor of correlation matrix
cholesky_factor_corr[M_1] L_1;
}
transformed parameters {
// group-level effects
matrix[N_1, M_1] r_1 = (diag_pre_multiply(sd_1, L_1) * z_1)';
vector[N_1] r_1_1 = r_1[, 1];
vector[N_1] r_1_2 = r_1[, 2];
vector[N_1] r_1_3 = r_1[, 3];
vector[N_1] r_1_4 = r_1[, 4];
vector[N_1] r_1_5 = r_1[, 5];
vector[N_1] r_1_6 = r_1[, 6];
}
model {
vector[N] mu = X * b;
for (n in 1:N) {
mu[n] += r_1_1[J_1[n]] * Z_1_1[n] + r_1_2[J_1[n]] * Z_1_2[n] + r_1_3[J_1[n]] * Z_1_3[n] + r_1_4[J_1[n]] * Z_1_4[n] + r_1_5[J_1[n]] * Z_1_5[n] + r_1_6[J_1[n]] * Z_1_6[n];
}
// priors including all constants
target += normal_lpdf(b[1] | 0.5,0.5);
target += normal_lpdf(b[3] | 0.5,0.5);
target += normal_lpdf(b[4] | 0.5,0.5);
target += normal_lpdf(b[5] | 0.5,0.5);
target += student_t_lpdf(sigma | 3, 0, 10)
- 1 * student_t_lccdf(0 | 3, 0, 10);
target += inv_gamma_lpdf(sd_1 | .5,.5);
target += normal_lpdf(to_vector(z_1) | 0, 1);
target += lkj_corr_cholesky_lpdf(L_1 | 1);
// likelihood including all constants
if (!prior_only) {
target += normal_lpdf(Y | mu, sigma);
}
}
generated quantities {
corr_matrix[M_1] Cor_1 = multiply_lower_tri_self_transpose(L_1);
vector<lower=-1,upper=1>[NC_1] cor_1;
// take only relevant parts of correlation matrix
cor_1[1] = Cor_1[1,2];
cor_1[2] = Cor_1[1,3];
cor_1[3] = Cor_1[2,3];
cor_1[4] = Cor_1[1,4];
cor_1[5] = Cor_1[2,4];
cor_1[6] = Cor_1[3,4];
cor_1[7] = Cor_1[1,5];
cor_1[8] = Cor_1[2,5];
cor_1[9] = Cor_1[3,5];
cor_1[10] = Cor_1[4,5];
cor_1[11] = Cor_1[1,6];
cor_1[12] = Cor_1[2,6];
cor_1[13] = Cor_1[3,6];
cor_1[14] = Cor_1[4,6];
cor_1[15] = Cor_1[5,6];
}
You can find a subset of the dataset here:
https://expirebox.com/download/ec988977cd40671e7916aed7e02e1b5a.html
[edit: escaped code]