Mixture-Model Inference: how to report hidden states, rather than state proportions?

Sure - if you know the true mixture component for some z[i] you can use those observed values in the likelihood and generated quantities, rather than summing over all of the mixture components. I usually do a little hack like this, where you add some data Z_obs and treat 0 as unknown/unobserved:

data {
   int<lower = 1> K; 		// number of mixture components
   int<lower = 1> N; 		// number of data points
   int y[N]; 			// observations
   int<lower = 0, upper = K> Z_obs[N]; // 0 represents NA (unknown Z values)
}

transformed data {
  int<lower = 0, upper = 1> Z_known[N];
  
  for (i in 1:N)
    Z_known[i] = Z_obs[i] > 0;
}

parameters {
  simplex[K] p;       		// mixture proportions
  ordered[K] lambda;  		// rates of mixture components
}

transformed parameters {
  vector[K] log_p = log(p);
}

model {
  lambda ~ lognormal(5, 8);
  
	for(i in 1:N){
    if (Z_known[i]) {
      target += poisson_lpmf(y[i] | lambda[Z_obs[i]]) + log_p[Z_obs[i]];
    } else {
      // Z is unknown, so we sum over all possible values
      vector[K] lps = log_p;
      for (k in 1:K) {
        lps[k] += poisson_lpmf(y[i] | lambda[k]);
      }
      target += log_sum_exp(lps);
    }
	}
}

generated quantities {
  int<lower = 1, upper = K> z[N];
  
  {
    vector[K] tmp;
    for (i in 1:N) {
      if (Z_known[i]) {
        z[i] = Z_obs[i];
      } else {
        for (k in 1:K) {
          tmp[k] = poisson_lpmf(y[i] | lambda[k]) + log_p[k];
        }
        z[i] = categorical_logit_rng(tmp);
      }
    }
  }
}

5 Likes