Well I don’t think you need both, and I think there’s a simpler way of doing the softmax thing. See if this makes sense:
data {
int I; //3
int T; //18
int J; //43
int<lower=0> y[T, J, I];
real log_max;
}
parameters {
vector[I-1] log_A[T, J];
vector<lower = 0>[I - 1] sigma;
vector[I - 1] U_z[J];
vector[I - 1] inter_z[J];
vector[I - 1] intra_z[J];
real mu_U;
real mu_inter;
real mu_intra;
real<lower = 0> sigma_U;
real<lower = 0> sigma_inter;
real<lower = 0> sigma_intra;
}
transformed parameters {
matrix[2, 2] B[J];
vector[I - 1] U[J];
vector[I - 1] inter[J];
vector[I - 1] intra[J];
for (j in 1:J) {
B[j, 1, 1] = intra[j, 1];
B[j, 2, 2] = intra[j, 2];
B[j, 1, 2] = inter[j, 1];
B[j, 2, 1] = inter[j, 2];
}
for (j in 1:J) {
inter[j] = mu_inter + inter_z * sigma_inter;
intra[j] = mu_intra + intra_z * sigma_intra;
U[j] = mu_U + U_z * sigma_U;
}
}
model {
mu_inter ~ normal(0, 10);
mu_intra ~ normal(0, 10);
// I'm just gonna turn these into normals cause I can
sigma_inter ~ normal(0, 5);
sigma_intra ~ normal(0, 5);
sigma[1] ~ normal(0, 5);
sigma[2] ~ normal(0, 5);
// non-centered prior on hierarchical parameters: https://mc-stan.org/users/documentation/case-studies/divergences_and_bias.html
for (j in 1:J) {
inter_z[j] ~ normal(0, 1);
intra_z[j] ~ normal(0, 1);
U_z[j] ~ normal(0, 1);
// Should have some sort of prior on the initial conditions more specific that a wide uniform
log_A[1, j] ~ normal(0, 10);
}
for (t in 2:T) {
for (j in 1:J) {
vector[I - 1] mu = U[j] + B[j] * log_A[t - 1, j];
log_A[t, j] ~ normal(mu, sigma);
}
}
for (t in 1:T) {
for (j in 1:J) {
y[t, j,] ~ multinomial(softmax([log_A[t, j, 1], log_A[t, j, 2], 0]'));
}
}
}