Hello I’m building a hierarchical DirichletMultinomial model to predict budget allocation splits across K alternatives.
The allocation split is a function of the total budget level, and this varies by several categories in a hierarchical structure (for example country, sector).
Currently the model runs ok with the betas and concentration parameter (kappa) varying by country (M parameter below), but I want to add more layers to the hierarchy (e.g. by country, by sector).
My questions are:

How can I optimise the current model model? Is this the best way to structure it for efficient sampling?

How can I extend the model hierarchy to include variation at additional multiple levels (e.g. by country and by sector)? It feels like a similar problem to the ‘seemingly unrelated regression’ in section 5.15 (Multivariate Outcomes) in the Stan manual. I’ve made a start but not sure how to handle the parameter correlations
All help and ideas much appreciated!
// ======================================================================
// MULTINOMIAL MODEL
// SPEND DATA
// ======================================================================
functions {
// for likelihood estimation
real dirichlet_multinomial_lpmf(int[] y, vector alpha) {
real alpha_plus = sum(alpha);
return lgamma(alpha_plus) + sum(lgamma(alpha + to_vector(y)))
 lgamma(alpha_plus+sum(y))  sum(lgamma(alpha));
}
}
data {
int<lower = 1> K; // num alternatives
int<lower = 1> N; // num observations
int<lower = 1> M; // num countries
int<lower = 0> y[N, K]; // allocation on each choice
int<lower = 1> market[N]; // markets
int<lower = 0> budget[N]; // total spend
vector[N] log_budget; // log spend
int<lower = 1> N_new; // new num observations
int<lower = 1> budget_new[N_new]; // new budget for simulation
vector[N_new] log_budget_new; // log new budget
}
parameters {
matrix<lower = 0>[M,K] beta_0;
matrix<lower = 0>[M,K] beta_1;
vector<lower = 1>[M] kappa;
vector<lower = 0>[K] mu_beta_0;
vector<lower = 0>[K] mu_beta_1;
vector<lower = 0>[M] mu_kappa;
vector<lower = 0>[K] sigma_beta_0;
vector<lower = 0>[K] sigma_beta_1;
vector<lower = 0>[M] sigma_kappa;
}
transformed parameters {
simplex[K] theta[N];
vector[K] x_beta[N];
// create XB index
{
for (n in 1:N) {
for(k in 1:K){
x_beta[n,k] = beta_0[market[n],k] + beta_1[market[n],k]*log_budget[n];
}
theta[n] = softmax(x_beta[n]);
}
}
}
model {
// priors
mu_beta_0 ~ lognormal(0,0.1);
mu_beta_1 ~ lognormal(0,0.1);
mu_kappa ~ lognormal(0,1);
sigma_beta_0 ~ lognormal(0,0.1);
sigma_beta_1 ~ lognormal(0,0.1);
sigma_kappa ~ lognormal(0,0.1);
for (m in 1:M){
to_vector(beta_0[m]) ~ gamma(mu_beta_0,sigma_beta_0);
to_vector(beta_1[m]) ~ gamma(mu_beta_1,sigma_beta_1);
to_vector(kappa) ~ lognormal(mu_kappa,sigma_kappa);
}
for (n in 1:N) {
y[n] ~ dirichlet_multinomial(kappa[market[n]]*theta[n]);
}
}
generated quantities {
int<lower=0> channel_sim[M,N_new,K];
vector[K] theta_new[M,N_new];
{
vector[K] alpha_new[N_new];
int channel_sim_tmp[N_new,K];
for (m in 1:M){
for (n in 1:N_new){
for(k in 1:K){
alpha_new[n,k] = beta_0[m,k] + beta_1[m,k]*log_budget_new[n];
}
theta_new[m,n] = kappa[m]*softmax(to_vector(alpha_new[n]));
channel_sim_tmp[n] = dirichlet_multinomial_rng(theta_new[m,n],budget_new[n]);
}
channel_sim[m] = channel_sim_tmp;
}
}
}