Hi everyone, I’m new to Stan and mixture models.
I’m fitting a polytomous cognitive diagnosis model (DINA-like) where latent attribute mastery patterns are explicitly enumerated:
C = 2^K
and the likelihood is a mixture over latent classes using log_sum_exp.
I tested MAP estimation (optimize()), and the MAP estimates differ a lot from the posterior estimates.
Below is the Stan program
data {
int<lower=1> I; // # examinees
int<lower=1> J; // # items
int<lower=1> K; // # attributes
int<lower=1> C; // # mixture components
int<lower=1> M; // # categories
array[I,J] int<lower=1, upper=M> y;
matrix[C,K] allpat;
matrix[J,C] eta;
parameters {
vector<lower=0, upper=5>[K] a;
vector<lower=-10, upper=10>[K] b;
vector[I] theta;
vector[J] f1;
vector[J] f2;
vector[J] f3;
vector<lower=0>[J] d1;
vector[J] d2;
vector[J] d3;
}
transformed parameters {
array[I, C] real log_lambda;
for (i in 1:I) {
for (c in 1:C) {
real temp_sum = 0;
for (k in 1:K) {
real logit_val = b[k] + a[k] * theta[i];
if (allpat[c, k] == 1) {
temp_sum += log_inv_logit(logit_val);
} else {
temp_sum += log1m_inv_logit(logit_val);
}
}
log_lambda[i, c] = temp_sum;
}
}
mod <- cmdstan_model(
write_stan_file(stan_code),
cpp_options = list(
stan_threads = TRUE,
STAN_NO_RANGE_CHECKS = TRUE
)
)
# MAP
fit_vbmap <- mod$optimize(
data = sim_data,
threads = 1,
jacobian = T,
array[C,J] simplex[M] sim_p;
for (c in 1:C) {
for (j in 1:J) {
vector[M] logits;
logits[1] = f1[j] + d1[j] * eta[j,c];
logits[2] = f2[j] + d2[j] * eta[j,c];
logits[3] = f3[j] + d3[j] * eta[j,c];
logits[4] = 0; // reference category
sim_p[c,j] = softmax(logits);
}
}
}
model {
a ~ normal(1, 0.2);
b ~ normal(0, 1.5);
theta ~ normal(0, 1);
// Polytomous item parameters
for (j in 1:J) {
f1[j] ~ normal(0, 1);
f2[j] ~ normal(0, 1);
f3[j] ~ normal(0, 1);
d1[j] ~ normal(1, 0.5);
d2[j] ~ normal(0, 0.5);
d3[j] ~ normal(0, 0.5);
}
for (i in 1:I) {
vector[C] log_probs;
for (c in 1:C) {
real lp_c = log_lambda[i, c];
for (j in 1:J) {
lp_c += categorical_lpmf(y[i,j] | sim_p[c,j]);
}
log_probs[c] = lp_c;
}
target += log_sum_exp(log_probs);
}
}
generated quantities {
array[I, C] real log_lambda_out;
for (i in 1:I) {
for (c in 1:C) {
log_lambda_out[i, c] = log_lambda[i, c];
}
}
vector[I] log_lik;
for (i in 1:I) {
vector[C] log_probs;
for (c in 1:C) {
real lp_c = log_lambda[i, c];
for (j in 1:J) {
lp_c += categorical_lpmf(y[i,j] | sim_p[c,j]);
}
log_probs[c] = lp_c;
}
log_lik[i] = log_sum_exp(log_probs);
}
}
"
And here is an example of MAP item parameter estimates:
a
A tibble: 5 × 2
variable estimate
1 a[1] 3.16
2 a[2] 2.37
3 a[3] 2.57
4 a[4] 3.33
5 a[5] 2.80
b
A tibble: 5 × 2
variable estimate
1 b[1] -1.60
2 b[2] 1.06
3 b[3] 0.966
4 b[4] 2.17
5 b[5] -0.255
Here is the true value
b=c(-1,2,1.5,1,0)
a=c(1,1.5,2,2.5,3)
-
Are MAP estimates expected to be unstable in latent-class mixture models like this?
-
Is this mainly due to multi-modality in the posterior or the simulation data?
-
Should I rely only on MCMC for this model?
-
Any tips on priors or reparameterization to improve identifiability?
I appreciate any guidance — I’m still learning! 🙏
Thanks very much for your time.