Hi everyone,
I try to fit a multivariate mixed model with latent variables. I regress the three dependent variables AdAwa, BI and Con on a set of seven latent stock variables and some regression specific control variables for I brands and T calender weeks.
(1) AdAwa_{it}=\alpha_i + Stock_{it} \beta^{Stock}_i + CV^{AdAwa}_{it} \beta^{CV_{AdAwa}}_i + \epsilon^{AdAwa}
(2) BI_{it}=\alpha_i + Stock_{it} \beta^{Stock}_i + CV^{BI}_{it} \beta^{CV_{BI}}_i + \epsilon^{BI}
(3) Con_{it}=\alpha_i + Stock_{it} \beta^{Stock}_i + CV^{Con}_{it} \beta^{CV_{Con}}_i + \epsilon^{Con}
with
(4) Stock_{it} = \lambda Stock_{it-1} + (1-\lambda) Flow_{it} ,
(5) Stock_{it=1} = Flow_{it=1} and
(6) 0\leq \lambda \leq 0.99
I allow for a correlation between the three error terms \epsilon^{AdAwa}, \epsilon^{BI} and \epsilon^{Con}. Further I assume for each equation that both \beta vectors for the stock and equation specific control variables are coming from one multivariate normal distribution.
Our data set consists of 20 brands and 200 weeks per brand.
data {
int<lower=0> N; //number of observations
int T; //time points
int<lower=0> N_id; //number of cross-sections
int<lower=0> id[N]; //id of cross-sections
int<lower=0> S; //number of stock variables
int<lower=0> A; //number of coefficients for AdAwa
int<lower=0> B; //number of coefficients for BI
int<lower=0> C; //number of coefficients for Con
int<lower=0> SA; //number of coefficients for Stock + CV for AdAwa
int<lower=0> SB; //number of coefficients for Stock + CV for BI
int<lower=0> SC; //number of coefficients for Stock + CV for Con
matrix[T, N_id] CV_TV_flow_matrix; // flow variable for 1. stock variable
matrix[T, N_id] CV_Print_flow_matrix;// flow variable for 2. stock variable
matrix[T, N_id] CV_Digital_flow_matrix;// flow variable for 3. stock variable
matrix[T, N_id] CV_Radio_flow_matrix;// flow variable for 4. stock variable
matrix[T, N_id] CV_Kino_flow_matrix;// flow variable for 5. stock variable
matrix[T, N_id] CV_OOH_flow_matrix;// flow variable for 6. stock variable
matrix[T, N_id] CV_DirectMail_flow_matrix;// flow variable for 7. stock variable
matrix[N, A] X_AdAwa; //CV predictor matrix AdAwa;
matrix[N, B] X_BI; //CV predictor matrix BI;
matrix[N, C] X_Con; //CV predictor matrix Con;
vector[N] AdAwa; //outcome values AdAwa
vector[N] BI; //outcome values BI
vector[N] Con; //outcome values Con
}
transformed data {
vector[3] Y[N];
vector[N] CV_TV_flow = to_vector(CV_TV_flow_matrix);
vector[N] CV_Print_flow = to_vector(CV_Print_flow_matrix);
vector[N] CV_Digital_flow = to_vector(CV_Digital_flow_matrix);
vector[N] CV_Radio_flow = to_vector(CV_Radio_flow_matrix);
vector[N] CV_Kino_flow = to_vector(CV_Kino_flow_matrix);
vector[N] CV_OOH_flow = to_vector(CV_OOH_flow_matrix);
vector[N] CV_DirectMail_flow = to_vector(CV_DirectMail_flow_matrix);
int NC_AdAwa = (SA*(SA-1))/2; //number of correlations for random coefficients in AdAwa
int NC_BI = (SB*(SB-1))/2; //number of correlations for random coefficients in BI
int NC_Con = (SC*(SC-1))/2; //number of correlations for random coefficients in Con
for (n in 1:N) {
Y[n] = [AdAwa[n], BI[n], Con[n]]';
}
}
parameters {
vector[A] Beta_0_AdAwa_raw;
vector[S] Beta_0_AdAwa_stock_raw;
vector[B] Beta_0_BI_raw;
vector[S] Beta_0_BI_stock_raw;
vector[C] Beta_0_Con_raw;
vector[S] Beta_0_Con_stock_raw;
real Alpha_0_AdAwa_raw;
real Alpha_0_BI_raw;
real Alpha_0_Con_raw;
real <lower=0, upper=0.99> lambda_TV;
real <lower=0, upper=0.99> lambda_Print;
real <lower=0, upper=0.99> lambda_Digital;
real <lower=0, upper=0.99> lambda_Radio;
real <lower=0, upper=0.99> lambda_Kino;
real <lower=0, upper=0.99> lambda_OOH;
real <lower=0, upper=0.99> lambda_DirectMail;
real<lower=0> sd_Alpha_AdAwa_a; // group-level standard deviations
real<lower=0> sd_Alpha_AdAwa_b; // group-level standard deviations
vector[N_id] z_Alpha_AdAwa; // standardized group-level effects
real<lower=0> sd_Alpha_BI_a; // group-level standard deviations
real<lower=0> sd_Alpha_BI_b; // group-level standard deviations
vector[N_id] z_Alpha_BI; // standardized group-level effects
real<lower=0> sd_Alpha_Con_a; // group-level standard deviations
real<lower=0> sd_Alpha_Con_b; // group-level standard deviations
vector[N_id] z_Alpha_Con; // standardized group-level effects
vector<lower=0>[SA] sd_Beta_AdAwa_a; // group-level standard deviations
vector<lower=0>[SA] sd_Beta_AdAwa_b; // group-level standard deviations
matrix[SA, N_id] z_Beta_AdAwa; // standardized group-level effects
// cholesky factor of correlation matrix
cholesky_factor_corr[SA] L_Beta_AdAwa;
vector<lower=0>[SB] sd_Beta_BI_a; // group-level standard deviations
vector<lower=0>[SB] sd_Beta_BI_b; // group-level standard deviations
matrix[SB, N_id] z_Beta_BI; // standardized group-level effects
// cholesky factor of correlation matrix
cholesky_factor_corr[SB] L_Beta_BI;
vector<lower=0>[SC] sd_Beta_Con_a; // group-level standard deviations
vector<lower=0>[SC] sd_Beta_Con_b; // group-level standard deviations
matrix[SC, N_id] z_Beta_Con; // standardized group-level effects
// cholesky factor of correlation matrix
cholesky_factor_corr[SC] L_Beta_Con;
vector <lower=0> [3] sigma_a;
vector <lower=0> [3] sigma_b;
cholesky_factor_corr[3] L_Omega;
}
transformed parameters {
matrix[T, N_id] CV_TV_stock_matrix;
matrix[T, N_id] CV_Print_stock_matrix;
matrix[T, N_id] CV_Digital_stock_matrix;
matrix[T, N_id] CV_Radio_stock_matrix;
matrix[T, N_id] CV_Kino_stock_matrix;
matrix[T, N_id] CV_OOH_stock_matrix;
matrix[T, N_id] CV_DirectMail_stock_matrix;
vector[N] CV_TV_stock;
vector[N] CV_Print_stock;
vector[N] CV_Digital_stock;
vector[N] CV_Radio_stock;
vector[N] CV_Kino_stock;
vector[N] CV_OOH_stock;
vector[N] CV_DirectMail_stock;
matrix[N, S] AdStock;
vector[A] Beta_0_AdAwa = 2 * Beta_0_AdAwa_raw; // implies ~normal(0,2)
vector[S] Beta_0_AdAwa_stock = 2 * Beta_0_AdAwa_stock_raw; // implies ~normal(0,2)
vector[B] Beta_0_BI = 2 * Beta_0_BI_raw; // implies ~normal(0,2)
vector[S] Beta_0_BI_stock = 2 *Beta_0_BI_stock_raw; // implies ~normal(0,2)
vector[C] Beta_0_Con = 2 * Beta_0_Con_raw; // implies ~normal(0,2)
vector[S] Beta_0_Con_stock = 2 * Beta_0_Con_stock_raw; // implies ~normal(0,2)
real Alpha_0_AdAwa = 5 * Alpha_0_AdAwa_raw; // implies ~normal(0,5)
real Alpha_0_BI = 5 * Alpha_0_BI_raw; // implies ~normal(0,5)
real Alpha_0_Con = 5 * Alpha_0_Con_raw; // implies ~normal(0,5)
real <lower=0> sd_Alpha_AdAwa = sd_Alpha_AdAwa_a .* sqrt(sd_Alpha_AdAwa_b);// implies ~cauchy(0,1)
real <lower=0> sd_Alpha_BI = sd_Alpha_BI_a .* sqrt(sd_Alpha_BI_b);// implies ~cauchy(0,1)
real <lower=0> sd_Alpha_Con = sd_Alpha_Con_a .* sqrt(sd_Alpha_Con_b);// implies ~cauchy(0,1)
vector<lower=0>[SA] sd_Beta_AdAwa = sd_Beta_AdAwa_a .* sqrt(sd_Beta_AdAwa_b);// implies ~cauchy(0,1)
vector<lower=0>[SB] sd_Beta_BI = sd_Beta_BI_a .* sqrt(sd_Beta_BI_b);// implies ~cauchy(0,1)
vector<lower=0>[SC] sd_Beta_Con = sd_Beta_Con_a .* sqrt(sd_Beta_Con_b);// implies ~cauchy(0,1)
vector <lower=0> [3] sigma = sigma_a .* sqrt(sigma_b);// implies ~cauchy(0,1)
vector[N_id] Alpha_i_AdAwa = sd_Alpha_AdAwa * z_Alpha_AdAwa;
// actual group-level effects
matrix[N_id, SA] Beta_i_AdAwa = (diag_pre_multiply(sd_Beta_AdAwa, L_Beta_AdAwa) * z_Beta_AdAwa)';
vector[N_id] Alpha_i_BI = sd_Alpha_BI * z_Alpha_BI;
// actual group-level effects
matrix[N_id, SB] Beta_i_BI = (diag_pre_multiply(sd_Beta_BI, L_Beta_BI) * z_Beta_BI)';
vector[N_id] Alpha_i_Con = sd_Alpha_Con * z_Alpha_Con;
// actual group-level effects
matrix[N_id, SC] Beta_i_Con = (diag_pre_multiply(sd_Beta_Con, L_Beta_Con) * z_Beta_Con)';
// state equation TV
for (i in 1:N_id){
CV_TV_stock_matrix[1,i] = CV_TV_flow_matrix[1, i];
for(t in 2:T) {
CV_TV_stock_matrix[t, i] = lambda_TV * CV_TV_stock_matrix[t-1, i] + (1 - lambda_TV) * CV_TV_flow_matrix[t, i];
}
}
CV_TV_stock = to_vector(CV_TV_stock_matrix);
// state equation Print
for (i in 1:N_id){
CV_Print_stock_matrix[1,i] = CV_Print_flow_matrix[1, i];
for(t in 2:T) {
CV_Print_stock_matrix[t, i] = lambda_Print * CV_Print_stock_matrix[t-1, i] + (1 - lambda_Print) * CV_Print_flow_matrix[t, i];
}
}
CV_Print_stock = to_vector(CV_Print_stock_matrix);
// state equation Digital
for (i in 1:N_id){
CV_Digital_stock_matrix[1,i] = CV_Digital_flow_matrix[1, i];
for(t in 2:T) {
CV_Digital_stock_matrix[t, i] = lambda_Digital * CV_Digital_stock_matrix[t-1, i] + (1 - lambda_Digital) * CV_Digital_flow_matrix[t, i];
}
}
CV_Digital_stock = to_vector(CV_Digital_stock_matrix);
// state equation Radio
for (i in 1:N_id){
CV_Radio_stock_matrix[1,i] = CV_Radio_flow_matrix[1, i];
for(t in 2:T) {
CV_Radio_stock_matrix[t, i] = lambda_Radio * CV_Radio_stock_matrix[t-1, i] + (1 - lambda_Radio) * CV_Radio_flow_matrix[t, i];
}
}
CV_Radio_stock = to_vector(CV_Radio_stock_matrix);
// state equation Kino
for (i in 1:N_id){
CV_Kino_stock_matrix[1,i] = CV_Kino_flow_matrix[1, i];
for(t in 2:T) {
CV_Kino_stock_matrix[t, i] = lambda_Kino * CV_Kino_stock_matrix[t-1, i] + (1 - lambda_Kino) * CV_Kino_flow_matrix[t, i];
}
}
CV_Kino_stock = to_vector(CV_Kino_stock_matrix);
// state equation OOH
for (i in 1:N_id){
CV_OOH_stock_matrix[1,i] = CV_OOH_flow_matrix[1, i];
for(t in 2:T) {
CV_OOH_stock_matrix[t, i] = lambda_OOH * CV_OOH_stock_matrix[t-1, i] + (1 - lambda_OOH) * CV_OOH_flow_matrix[t, i];
}
}
CV_OOH_stock = to_vector(CV_OOH_stock_matrix);
// state equation DirectMail
for (i in 1:N_id){
CV_DirectMail_stock_matrix[1,i] = CV_DirectMail_flow_matrix[1, i];
for(t in 2:T) {
CV_DirectMail_stock_matrix[t, i] = lambda_DirectMail * CV_DirectMail_stock_matrix[t-1, i] + (1 - lambda_DirectMail) * CV_DirectMail_flow_matrix[t, i];
}
}
CV_DirectMail_stock = to_vector(CV_DirectMail_stock_matrix);
AdStock[:,1] = CV_TV_stock;
AdStock[:,2] = CV_Print_stock;
AdStock[:,3] = CV_Digital_stock;
AdStock[:,4] = CV_Radio_stock;
AdStock[:,5] = CV_Kino_stock;
AdStock[:,6] = CV_OOH_stock;
AdStock[:,7] = CV_DirectMail_stock;
}
model {
//define parameters
vector[N] mu_AdAwa = Alpha_0_AdAwa + AdStock * Beta_0_AdAwa_stock + X_AdAwa*Beta_0_AdAwa + rows_dot_product(rep_vector(1, N), Alpha_i_AdAwa[id]) + rows_dot_product(append_col(AdStock, X_AdAwa), Beta_i_AdAwa[id]);
vector[N] mu_BI = Alpha_0_BI + AdStock * Beta_0_BI_stock + X_BI*Beta_0_BI + rows_dot_product(rep_vector(1, N), Alpha_i_BI[id]) + rows_dot_product(append_col(AdStock, X_BI), Beta_i_BI[id]);
vector[N] mu_Con = Alpha_0_Con + AdStock * Beta_0_Con_stock + X_Con*Beta_0_Con + rows_dot_product(rep_vector(1, N), Alpha_i_Con[id]) + rows_dot_product(append_col(AdStock, X_Con), Beta_i_Con[id]);
matrix[3, 3] LSigma = diag_pre_multiply(sigma, L_Omega);
vector[3] mu[N];
for (n in 1:N) {
mu[n] = [mu_AdAwa[n], mu_BI[n], mu_Con[n]]';
}
//priors
//coefficients
Alpha_0_AdAwa_raw ~ normal(0, 1);
Alpha_0_BI_raw ~ normal(0, 1);
Alpha_0_Con_raw ~ normal(0, 1);
Beta_0_AdAwa_raw ~ normal(0, 1);
Beta_0_AdAwa_stock_raw ~ normal(0, 1);
Beta_0_BI_raw ~ normal(0, 1);
Beta_0_BI_stock_raw ~ normal(0, 1);
Beta_0_Con_raw ~ normal(0, 1);
Beta_0_Con_stock_raw ~ normal(0, 1);
z_Alpha_AdAwa ~ normal(0, 1);
z_Alpha_BI ~ normal(0, 1);
z_Alpha_Con ~ normal(0, 1);
to_vector(z_Beta_AdAwa) ~ normal(0, 1);
to_vector(z_Beta_BI) ~ normal(0, 1);
to_vector(z_Beta_Con) ~ normal(0, 1);
//sigmas
sd_Alpha_AdAwa_a ~ normal(0,1);
sd_Alpha_AdAwa_b ~ inv_gamma(0.5, 0.5);
sd_Alpha_BI_a ~ normal(0,1);
sd_Alpha_BI_b ~ inv_gamma(0.5, 0.5);
sd_Alpha_Con_a ~ normal(0,1);
sd_Alpha_Con_b ~ inv_gamma(0.5, 0.5);
sd_Beta_AdAwa_a ~ normal(0,1);
sd_Beta_AdAwa_b ~ inv_gamma(0.5, 0.5);
L_Beta_AdAwa ~ lkj_corr_cholesky(2);
sd_Beta_BI_a ~ normal(0,1);
sd_Beta_BI_b ~ inv_gamma(0.5, 0.5);
L_Beta_BI ~ lkj_corr_cholesky(2);
sd_Beta_Con_a ~ normal(0,1);
sd_Beta_Con_b ~ inv_gamma(0.5, 0.5);
L_Beta_Con ~ lkj_corr_cholesky(2);
sigma_a ~ normal(0,1);
sigma_b ~ inv_gamma(0.5, 0.5);
L_Omega ~ lkj_corr_cholesky(2);
//likelihood
Y ~ multi_normal_cholesky(mu, LSigma);
}
generated quantities {
vector[N_id] Alpha_AdAwa;
vector[N_id] Alpha_BI;
vector[N_id] Alpha_Con;
matrix[N_id, SA] Beta_AdAwa;
matrix[N_id, SB] Beta_BI;
matrix[N_id, SC] Beta_Con;
corr_matrix[SA] Cor_Beta_AdAwa;
vector<lower=-1,upper=1>[NC_AdAwa] cor_Beta_AdAwa;
corr_matrix[SB] Cor_Beta_BI;
vector<lower=-1,upper=1>[NC_BI] cor_Beta_BI;
corr_matrix[SC] Cor_Beta_Con;
vector<lower=-1,upper=1>[NC_Con] cor_Beta_Con;
corr_matrix[3] Rescor = multiply_lower_tri_self_transpose(L_Omega);
vector<lower=-1,upper=1>[3] rescor;
for(i in 1:N_id) {
Alpha_AdAwa[i] = Alpha_0_AdAwa + Alpha_i_AdAwa[i];
Beta_AdAwa[i, ] = append_col(to_row_vector(Beta_0_AdAwa_stock),to_row_vector(Beta_0_AdAwa)) + Beta_i_AdAwa[i, ];
Alpha_BI[i] = Alpha_0_BI + Alpha_i_BI[i];
Beta_BI[i, ] = append_col(to_row_vector(Beta_0_BI_stock),to_row_vector(Beta_0_BI)) + Beta_i_BI[i, ];
Alpha_Con[i] = Alpha_0_Con + Alpha_i_Con[i];
Beta_Con[i, ] = append_col(to_row_vector(Beta_0_Con_stock),to_row_vector(Beta_0_Con)) + Beta_i_Con[i, ];
}
// group-level correlations
Cor_Beta_AdAwa = multiply_lower_tri_self_transpose(L_Beta_AdAwa);
// extract upper diagonal of correlation matrix
for (l in 1:SA) {
for (j in 1:(l - 1)) {
cor_Beta_AdAwa[choose(l - 1, 2) + j] = Cor_Beta_AdAwa[j, l];
}
}
Cor_Beta_BI = multiply_lower_tri_self_transpose(L_Beta_BI);
for (l in 1:SB) {
for (j in 1:(l - 1)) {
cor_Beta_BI[choose(l - 1, 2) + j] = Cor_Beta_BI[j, l];
}
}
Cor_Beta_Con = multiply_lower_tri_self_transpose(L_Beta_Con);
for (l in 1:SC) {
for (j in 1:(l - 1)) {
cor_Beta_Con[choose(l - 1, 2) + j] = Cor_Beta_Con[j, l];
}
}
for (k in 1:3) {
for (j in 1:(k - 1)) {
rescor[choose(k - 1, 2) + j] = Rescor[j, k];
}
}
}
I have already run similar smaller models for some toy data sets and the model performance was good. But for this model and my real data, the performane is awful. The model currently runs for 16 hours and has not even finished the first 200 iterations. Is there anything in the code I could try to speed up the model?
Thanks!