Sure - if you know the true mixture component for some z[i]
you can use those observed values in the likelihood and generated quantities, rather than summing over all of the mixture components. I usually do a little hack like this, where you add some data Z_obs
and treat 0 as unknown/unobserved:
data {
int<lower = 1> K; // number of mixture components
int<lower = 1> N; // number of data points
int y[N]; // observations
int<lower = 0, upper = K> Z_obs[N]; // 0 represents NA (unknown Z values)
}
transformed data {
int<lower = 0, upper = 1> Z_known[N];
for (i in 1:N)
Z_known[i] = Z_obs[i] > 0;
}
parameters {
simplex[K] p; // mixture proportions
ordered[K] lambda; // rates of mixture components
}
transformed parameters {
vector[K] log_p = log(p);
}
model {
lambda ~ lognormal(5, 8);
for(i in 1:N){
if (Z_known[i]) {
target += poisson_lpmf(y[i] | lambda[Z_obs[i]]) + log_p[Z_obs[i]];
} else {
// Z is unknown, so we sum over all possible values
vector[K] lps = log_p;
for (k in 1:K) {
lps[k] += poisson_lpmf(y[i] | lambda[k]);
}
target += log_sum_exp(lps);
}
}
}
generated quantities {
int<lower = 1, upper = K> z[N];
{
vector[K] tmp;
for (i in 1:N) {
if (Z_known[i]) {
z[i] = Z_obs[i];
} else {
for (k in 1:K) {
tmp[k] = poisson_lpmf(y[i] | lambda[k]) + log_p[k];
}
z[i] = categorical_logit_rng(tmp);
}
}
}
}