Hello, I’m trying to fit a two component multivariate normal mixture model to some multivariate data which is well separated into two multivariate normal looking distributions but I keep getting results that collapse the posterior distribution into one component only. I was wondering whether you can tell this is because there’s an error in my Stan code, or whether this is because it’s inherently very difficult to fit Bayesian multivariate mixture models.
Am I right in thinking the posterior likelihood doesn’t take into account the extremely low probability of your data not having any observations from the proposed posterior mean or mode - so if your data if split between either side of this mean or mode, it builds evidence for the posterior mean or mode to be in the middle (rather than giving me the two separate components I am hoping to uncover)?
data {
int<lower=1> K; // number of mixture components
int<lower=1> J; // number of individual profiles
int<lower=1> D; // dimensions of parameter vector
vector[D] beta_j[J]; // parameter vectors for each individual profile
vector[D] mu0_1; //prior on mu vector for component 1
vector[D] mu0_2; //prior on mu vector for component 2
matrix[D,D] sigma_mu; //prior on correlation matrix for all components
}
parameters {
simplex[K] theta; // mixture component proportions
ordered[K] beta_0; // ordering the first element of the parameter vector, between the components
vector[D-1] beta_1[K]; // remaining elements of the parameter vector
cholesky_factor_corr[D] L[K]; // mixture component Cholesky factor correlation matrices
}
transformed parameters {
vector[D] mu[K];
vector[K] lps[J];
for (k in 1:K) {
mu[k]=append_row(beta_0[k], beta_1[k]); // parameter vector
}
for (j in 1:J) {
vector[K] log_theta = log(theta); // cache log calculation
for (k in 1:K) {
lps[j, k] = log_theta[k] + multi_normal_cholesky_lpdf(beta_j[j] | mu[k], L[k]);
}
}
}
model {
for (k in 1:K) {
L[k] ~ lkj_corr_cholesky(4);
}
//priors
theta ~ beta(100,100);
mu[1] ~ multi_normal(mu0_1, sigma_mu);
mu[2] ~ multi_normal(mu0_2, sigma_mu);
for (j in 1:J) {
target += log_sum_exp(lps[j,]);
}
}
generated quantities {
simplex[K] posterior_assignment[J];
for (j in 1:J) {
posterior_assignment[j]=softmax(lps[j,]);
}
}