Hello,
I’m hoping for your advice about a Multivariate Probit i have to fit, and i know that surely is wrong for my poor experience.
I’m trying to predict purchase in 16 categories using as covariates some characteristics of the customers, card ownership of the store that supply the data and the purchases in the last month in the same categories for the same customers. Then, i try to adapt the approach of this paper (https://doi.org/10.1509/jmkr.42.2.233.62288).
Then, i will very pleased if you can give some advices about:
- How i can simulate this kind of data? Obviously, i’m not sure if this model fit the data, so i want to recover known parameters first (i know is not a stan issue, but i need some guidance)
- With the current model i have the maximum threedepth excedeed warning. I read that a non-centered parameterization could help, how i could apply it in this case ?
- I’m not sure if i have to include the Jacobian.
- Any speeding up, because the estimation is very low.
I would be very very grateful if someone can give me some advice on how to best approach this problem. Obviously, i know there are many issues in the post and i don’t want that you do my homework, but everything would be helpful.
The model looks as follows (and also i attach the data and stan code, i’m using Rstan):
Multivariate Probit v1.stan (2.8 KB)
Multivariate Probit v1.R (966 Bytes)
sample data.csv (562.5 KB)
functions {
int sum(int[,] a) {
int s;
s = 0;
for (i in 1:size(a))
for (j in 1:size(a[i]))
s = s + a[i,j];
return s;
}
}data {
int<lower=1> NCAT; // NUMBER OF CATEGORIES
int<lower=0> NDAT; // NUMBER OF OBSERVATIONS
int<lower=1> NCHAR; // NUMBER OF CUSTOMER-LEVEL CHARACTERISTICS
int<lower=0,upper=1> y[NDAT,NCAT]; // PURCHASE MATRIX FOR PREDICTION
vector[NCAT] x[NDAT]; // PURCHASE MATRIX LAG 1
vector[NCHAR] x0[NDAT]; // DEMOGRAFIC COSTUMERS MATRIX
row_vector[NDAT] t; // BINARY FOR CARD OWNER
}// MULTIVARIATE PROBIT TRANSFORMATION FROM USER GUIDE
transformed data {
int<lower=0> N_pos;
int<lower=1,upper=NDAT> n_pos[sum(y)];
int<lower=1,upper=NCAT> d_pos[size(n_pos)];
int<lower=0> N_neg;
int<lower=1,upper=NDAT> n_neg[(NDAT * NCAT) - size(n_pos)];
int<lower=1,upper=NCAT> d_neg[size(n_neg)];N_pos=size(n_pos);
N_neg=size(n_neg);
{
int i;
int j;
i=1;
j=1;
for (n in 1:NDAT) {
for (d in 1:NCAT) {
if (y[n,d] == 1) {
n_pos[i] = n;
d_pos[i] = d;
i=i + 1;
} else {
n_neg[j] = n;
d_neg[j] = d;
j = j + 1;
}
}
}
}
}parameters {
vector[NCAT-1] beta_free; // FREE PARAMETERS TO FIXED ONE
vector[NCAT] teta; // PARAMETER FOR CARD OWNERSHIP
vector[NDAT] gama; // PARAMETERS FOR INDIVIDUAL HIERARCHY
vector<lower=0>[NDAT] tau;
vector[NCHAR] delta;
cholesky_factor_corr[NCAT] L_Omega;
vector<lower=0>[N_pos] z_pos;
vector<upper=0>[N_neg] z_neg;}
transformed parameters {
vector[NCAT] beta; // TRANSFORMATION OF BETA_FREE
vector[NDAT] dm; // MATURITY
vector[NCAT] utilidad[NDAT];
vector[NCAT] z[NDAT];beta[1]=1; // FIX BETA 1
// FILL BETA VECTOR
for (i in 1:(NCAT-1))
beta[i+1] = beta_free[i];// MATURITY
for (n in 1:NDAT)
dm[n] = dot_product(beta,x[n]);// CONSTRUCT LATENT UTILITY OF EACH OBSERVATION
for (d in 1:NCAT)
for (n in 1:NDAT)
utilidad[n, d] = gama[n]*fabs(beta[d]-dm[n])+teta[d]*t[n];
;for (n in 1:N_pos)
z[n_pos[n], d_pos[n]] = z_pos[n];
for (n in 1:N_neg)
z[n_neg[n], d_neg[n]] = z_neg[n];}
model {
L_Omega ~ lkj_corr_cholesky(4);
to_vector(beta_free) ~ normal(0, 5);
to_vector(teta) ~ normal(0, 5);
to_vector(gama) ~ normal(0, 5);
to_vector(delta) ~ normal(0, 5);
to_vector(tau) ~ cauchy(0,2.5);
{// FILL CUSTOMER-LEVEL PARAMETERS for(n in 1:NDAT) gama[n] ~ normal(dot_product(delta,x0[n]),tau); // NON-CENTERED PARAMETERIZATION? z ~ multi_normal_cholesky( utilidad , L_Omega);
}
}
generated quantities {
corr_matrix[NCAT] Omega;
Omega = multiply_lower_tri_self_transpose(L_Omega);
}