I am trying to fit the below model in which an adapted Latent Dirichlet model is used to identify latent types of individuals as captures in theta. For each of these individuals we observe a set of rankings from an experiment, and we fit a Placket-Luce (exploded-logit) model to decompose these rankings into the importance of (a) the types, (b) a set of observable characteristics X and (c) an individual mean (Gamma).
The index contains an ID for each individual from 1:N, and the ranking contains the ranking of the assessor for up to three individuals.
I am estimating this model for simulated data currently, and have some technical and conceptual issues:
- I ordered one of the type characteristics to ensure the model is identifiable, however the model can now converge to a point where this constraint is binding. Is it kosher to simply swap the type labels after the warm-up if this ordering constraint binds? Can this done directly using the fit object and then re-entered as a new initial value?
- In trying to estimate alphaVar I worry that I run into a problem similar to Neal’s Funnel and is I include this in estimation I get an E-BMFI<0.2. How might I reparameterise my model to avoid this?
- Does the code generally look sensible/efficient, this is my first STAN model so I am still trying to figure out best practices.
I can share simulated data and the R file I use to run this code if helpful.
data {
int<lower=1> N; // Number of documents
int<lower=1> K; // Number of types
int<lower=1> E; // Number of enumerators
int<lower=1> V; // Number of vignettes
array[N, V, E] int<lower=1> yAction; // Data on Action
array[N, V, E] int<lower=1> yTone; // Data on Tone
array[N, V, E] int<lower=1> yAuthority; // Data on Authority
array[N, V, E] int<lower=1> yJustification; // Data on Justification
vector<lower=0>[K] alphaTopic; // topic prior
vector<lower=0>[2] etaAction; // word prior
vector<lower=0>[4] etaFour; // word prior
// STAN Model For Placket-Luce
int<lower=1> F;
int<lower=1> Q; // number of alternative PL
int<lower=1> S;
int<lower=1> Dx;
array[N] vector[Dx] X;
array[F, Q, V, S] int index; // Can I incorporate the ranking here?
array[F, Q, V, S] int ranking; //
}
parameters {
ordered[K] phiInvActionSmall; // phi for action - attempt to make unique labels
array[K] simplex[4] phiTone; // phi for action
array[K] simplex[4] phiAuthority; // phi for action
array[K] simplex[4] phiJustification; // phi for action
array[V, E] vector[1] gammaActionSub;
array[V, E] vector[3] gammaToneSub;
array[V, E] vector[3] gammaAuthoritySub;
array[V, E] vector[3] gammaJustificationSub;
array[N] simplex[K] theta;
// PL Model:
// Individual level parameters
array[F, S] vector[Dx] alpha_fs;
array[F, S] vector[K-1] beta_fs;
array[N%/%3*2] real gamma; // Individual fixed effects, needs to be smaller for identification
// Hyperparameters
array[S] vector[Dx] alpha;
array[S] vector[K-1] beta;
// Variances
// real<lower=0.01> alphaVar;
// real<lower=0.01> betaVar;
// real<lower=0.01> gammaVar;
}
transformed parameters {
array[K] vector[2] phiInvAction;
array[K] vector[4] phiInvTone;
array[K] vector[4] phiInvJustification;
array[K] vector[4] phiInvAuthority;
for (k in 1:K) {
phiInvAction[k, 1] = 0;
phiInvAction[k, 2] = phiInvActionSmall[k];
// phiInvAction[k] = phiAction[k]./(phiAction[k,1]);
phiInvTone[k] = log(phiTone[k]./((phiTone[k,1])));
phiInvJustification[k] = log(phiJustification[k]./((phiJustification[k,1])));
phiInvAuthority[k] = log(phiAuthority[k]./((phiAuthority[k,1])));
}
}
model {
// Dirichlet Model
// Note: Model is identified up to type labels
// Ensure identifiability how???
// For now: Gamma = 0 for first choice option AND first vignette
array[V, E] vector[2] gammaAction;
array[V, E] vector[4] gammaTone;
array[V, E] vector[4] gammaAuthority;
array[V, E] vector[4] gammaJustification;
// This is too strict a constraint, but unsure how to do correctly
// It should be identified like this so will update later
for (e in 1:E) {
for (v in 1:V) {
gammaAction[v,e] = append_row(0 , gammaActionSub[v,e]);
gammaTone[v,e] = append_row(0 , gammaToneSub[v,e]);
gammaAuthority[v,e] = append_row(0 , gammaAuthoritySub[v,e]);
gammaJustification[v,e] = append_row(0 , gammaJustificationSub[v,e]);
}
}
// Main Log-Likelihood DGP:
for (n in 1:N) {
for (e in 1:E) {
for (v in 1:V) {
real gammaDir[K];
for (k in 1:K) {
gammaDir[k] = log(theta[n, k]) + categorical_logit_lpmf(yAction[n,v,e] | (phiInvAction[k]+gammaAction[v, e])) +
categorical_logit_lpmf(yTone[n,v,e] | (phiInvTone[k]+gammaTone[v, e])) +
categorical_logit_lpmf(yAuthority[n,v,e] | (phiInvAuthority[k]+gammaAuthority[v, e])) +
categorical_logit_lpmf(yJustification[n,v,e] | (phiInvJustification[k]+gammaJustification[v, e]));
}
target += log_sum_exp(gammaDir); // likelihood;
}
}
}
// Impose priors:
array[K] vector[2] phiAction;
for (k in 1:K) {
phiAction[k] = softmax(phiInvAction[k]);
phiAction[k] ~ dirichlet(etaAction); // prior beta
phiTone[k] ~ dirichlet(etaFour); // prior beta
phiAuthority[k] ~ dirichlet(etaFour); // prior beta
phiJustification[k] ~ dirichlet(etaFour); // prior beta
}
for (n in 1:N) {
theta[n] ~ dirichlet(alphaTopic); // prior theta
}
for (e in 1:E) {
for (v in 1:V) {
gammaAction[v,e] ~ normal(0, 1);
gammaTone[v,e] ~ normal(0, 1);
gammaJustification[v,e] ~ normal(0, 1);
gammaAuthority[v,e] ~ normal(0, 1);
}
}
// PL Model:
array[N] real gammaFull;
# Populating the gamma matrix including zeros for identification
for (i in 1:N%/%3) {
// For every triplet the first element is zero.
gammaFull[(i-1)*3+1] = 0;
for (n in 2:3) {
// For every triplet the second and third element are non-zero.
gammaFull[(i-1)*3+n] = gamma[(i-1)*2+n-1];
}
}
for (f in 1:F) {
for (v in 1:V) {
for (s in 1:S) {
//if (index[f, 1, v, s] < 0); { // This will be flagged if no assessments
// continue;
//}
//int numOptions = 2; // This will capture judges who only assessed two candidates.
//if (index[f,Q,v,s] > 0) {
int numOptions = 3;
//}
vector[numOptions] U;
for (q in 1:numOptions) {
// TS Note: Removing the theta * beta term here does not solve issues around
// E-BMFI and actually introduces further abnormalities.
U[ranking[f,q,v,s]] = theta[index[f,q,v,s], 2:]' * beta_fs[f,s] + X[index[f,q,v,s]]' * alpha_fs[f,s] + gammaFull[index[f,q,v,s]];
}
for (q in 1:numOptions) {
target += U[q] - log_sum_exp(U[q:]);
U[q] = 0;
}
}
}
}
// Adding the prior distribution:
for (f in 1:F) {
for (s in 1:S) {
// alpha_fs[f,s] ~ normal(alpha[s], alphaVar);
// beta_fs[f,s] ~ normal(beta[s], betaVar);
alpha_fs[f,s] ~ normal(alpha[s], 1);
beta_fs[f,s] ~ normal(beta[s], 1);
}
}
// for (n in 1:N%/%3*2) {
// gamma[n] ~ normal(0, gammaVar);
gamma ~ normal(0, 1);
// }
for (s in 1:S) {
alpha[s] ~ normal(0,3);
beta[s] ~ normal(0,3);
}
// alphaVar ~ inv_gamma(1,1);
// betaVar ~ inv_gamma(1,1);
// gammaVar ~ inv_gamma(1,1);
}