Hello,

I’m hoping for your advice about a Multivariate Probit i have to fit, and i know that surely is wrong for my poor experience.

I’m trying to predict purchase in 16 categories using as covariates some characteristics of the customers, card ownership of the store that supply the data and the purchases in the last month in the same categories for the same customers. Then, i try to adapt the approach of this paper (https://doi.org/10.1509/jmkr.42.2.233.62288).

Then, i will very pleased if you can give some advices about:

- How i can simulate this kind of data? Obviously, i’m not sure if this model fit the data, so i want to recover known parameters first (i know is not a stan issue, but i need some guidance)
- With the current model i have the maximum threedepth excedeed warning. I read that a non-centered parameterization could help, how i could apply it in this case ?
- I’m not sure if i have to include the Jacobian.
- Any speeding up, because the estimation is very low.

I would be very very grateful if someone can give me some advice on how to best approach this problem. Obviously, i know there are many issues in the post and i don’t want that you do my homework, but everything would be helpful.

The model looks as follows (and also i attach the data and stan code, i’m using Rstan):

Multivariate Probit v1.stan (2.8 KB)

Multivariate Probit v1.R (966 Bytes)

sample data.csv (562.5 KB)

functions {

int sum(int[,] a) {

int s;

s = 0;

for (i in 1:size(a))

for (j in 1:size(a[i]))

s = s + a[i,j];

return s;

}

}data {

int<lower=1> NCAT; // NUMBER OF CATEGORIES

int<lower=0> NDAT; // NUMBER OF OBSERVATIONS

int<lower=1> NCHAR; // NUMBER OF CUSTOMER-LEVEL CHARACTERISTICS

int<lower=0,upper=1> y[NDAT,NCAT]; // PURCHASE MATRIX FOR PREDICTION

vector[NCAT] x[NDAT]; // PURCHASE MATRIX LAG 1

vector[NCHAR] x0[NDAT]; // DEMOGRAFIC COSTUMERS MATRIX

row_vector[NDAT] t; // BINARY FOR CARD OWNER

}// MULTIVARIATE PROBIT TRANSFORMATION FROM USER GUIDE

transformed data {

int<lower=0> N_pos;

int<lower=1,upper=NDAT> n_pos[sum(y)];

int<lower=1,upper=NCAT> d_pos[size(n_pos)];

int<lower=0> N_neg;

int<lower=1,upper=NDAT> n_neg[(NDAT * NCAT) - size(n_pos)];

int<lower=1,upper=NCAT> d_neg[size(n_neg)];N_pos=size(n_pos);

N_neg=size(n_neg);

{

int i;

int j;

i=1;

j=1;

for (n in 1:NDAT) {

for (d in 1:NCAT) {

if (y[n,d] == 1) {

n_pos[i] = n;

d_pos[i] = d;

i=i + 1;

} else {

n_neg[j] = n;

d_neg[j] = d;

j = j + 1;

}

}

}

}

}parameters {

vector[NCAT-1] beta_free; // FREE PARAMETERS TO FIXED ONE

vector[NCAT] teta; // PARAMETER FOR CARD OWNERSHIP

vector[NDAT] gama; // PARAMETERS FOR INDIVIDUAL HIERARCHY

vector<lower=0>[NDAT] tau;

vector[NCHAR] delta;

cholesky_factor_corr[NCAT] L_Omega;

vector<lower=0>[N_pos] z_pos;

vector<upper=0>[N_neg] z_neg;}

transformed parameters {

vector[NCAT] beta; // TRANSFORMATION OF BETA_FREE

vector[NDAT] dm; // MATURITY

vector[NCAT] utilidad[NDAT];

vector[NCAT] z[NDAT];beta[1]=1; // FIX BETA 1

// FILL BETA VECTOR

for (i in 1:(NCAT-1))

beta[i+1] = beta_free[i];// MATURITY

for (n in 1:NDAT)

dm[n] = dot_product(beta,x[n]);// CONSTRUCT LATENT UTILITY OF EACH OBSERVATION

for (d in 1:NCAT)

for (n in 1:NDAT)

utilidad[n, d] = gama[n]*fabs(beta[d]-dm[n])+teta[d]*t[n];

;for (n in 1:N_pos)

z[n_pos[n], d_pos[n]] = z_pos[n];

for (n in 1:N_neg)

z[n_neg[n], d_neg[n]] = z_neg[n];}

model {

L_Omega ~ lkj_corr_cholesky(4);

to_vector(beta_free) ~ normal(0, 5);

to_vector(teta) ~ normal(0, 5);

to_vector(gama) ~ normal(0, 5);

to_vector(delta) ~ normal(0, 5);

to_vector(tau) ~ cauchy(0,2.5);

{`// FILL CUSTOMER-LEVEL PARAMETERS for(n in 1:NDAT) gama[n] ~ normal(dot_product(delta,x0[n]),tau); // NON-CENTERED PARAMETERIZATION? z ~ multi_normal_cholesky( utilidad , L_Omega);`

}

}

generated quantities {

corr_matrix[NCAT] Omega;

Omega = multiply_lower_tri_self_transpose(L_Omega);

}