Unrelated to the label switching, you might run into issues fitting this model when treating the population size N as continuous. To fit the same model that’s being fit in LCMCR you need to also marginalize out N, which is analytically possible (see table 1 of [2101.09304] Revisiting Identifying Assumptions for Population Size Estimation). Here’s code I’ve used previously that’s implemented the model from LCMCR with N marginalized out:
data {
// Data
int<lower=0> J; // Number of conflict data sources
int<lower=0> twoJ; // 2^J
int<lower=0> z[twoJ - 1]; // Observed overlap from conflict data sources
int<lower=0, upper=1> x[twoJ, J]; // Design matrix for linear model
int<lower=1> K_star;
// Prior hyperparmaters
real<lower=0> a;
real<lower=0> b;
}
parameters
{
real<lower=0> alpha;
matrix<lower=0, upper=1>[K_star, J] lambda;
real<lower=0, upper=1> V[K_star - 1];
}
transformed parameters{
real<upper=0> log_nu[K_star];
real<upper=0> log_pis[twoJ];
simplex[twoJ] pis;
real<upper=0> log_pis_temp[K_star];
// Cribbed from https://ecosang.github.io/blog/study/dirichlet-process-with-stan/
log_nu[1] = log(V[1]);
for(k in 2:(K_star - 1)){
log_nu[k] = log(V[k]) + log(1 - V[k - 1]) + log_nu[k - 1] - log(V[k - 1]);
}
log_nu[K_star] = log(1 - V[K_star - 1]) + log_nu[K_star - 1] - log(V[K_star - 1]);
for(i in 1:twoJ){
for(k in 1:K_star){
log_pis_temp[k] = log_nu[k] + bernoulli_lpmf(x[i, ] | lambda[k, ]);
}
log_pis[i] = log_sum_exp(log_pis_temp);
pis[i] = exp(log_pis[i]);
}
}
model
{
alpha ~ gamma(a, b);
for(k in 1:K_star){
lambda[k, ] ~ beta(1, 1);
}
V ~ beta(1, alpha);
for(i in 1:(twoJ - 1)){
target += z[i] * log_pis[i + 1];
}
target += -1 * sum(z) * log1m_exp(log_pis[1]);
}
generated quantities{
int<lower=0> z_0;
z_0 = neg_binomial_rng(sum(z), (1 - pis[1]) / pis[1]);
}
I believe what I’ve called x corresponds to what you call list_indicators.