How do I limit the sum of the elements in a vector to less than a constant

Well I don’t think you need both, and I think there’s a simpler way of doing the softmax thing. See if this makes sense:

data {
  int I;                   //3
  int T;                  //18
  int J;                  //43
  int<lower=0> y[T, J, I];
  real log_max;
}

parameters {
  vector[I-1] log_A[T, J];
  vector<lower = 0>[I - 1] sigma;
  vector[I - 1] U_z[J];
  vector[I - 1] inter_z[J];
  vector[I - 1] intra_z[J];
  real mu_U;
  real mu_inter;
  real mu_intra;
  real<lower = 0> sigma_U;
  real<lower = 0> sigma_inter;
  real<lower = 0> sigma_intra;
}

transformed parameters {
  matrix[2, 2] B[J];
  vector[I - 1] U[J];
  vector[I - 1] inter[J];
  vector[I - 1] intra[J];

  for (j in 1:J) {
    B[j, 1, 1] = intra[j, 1];
    B[j, 2, 2] = intra[j, 2];
    B[j, 1, 2] = inter[j, 1];
    B[j, 2, 1] = inter[j, 2];
  }

  for (j in 1:J) {
    inter[j] = mu_inter + inter_z * sigma_inter;
    intra[j] = mu_intra + intra_z * sigma_intra;
    U[j] = mu_U + U_z * sigma_U;
  }
}

model {
  mu_inter ~ normal(0, 10);
  mu_intra ~ normal(0, 10);
  
  // I'm just gonna turn these into normals cause I can
  sigma_inter ~ normal(0, 5);
  sigma_intra ~ normal(0, 5);
  sigma[1] ~ normal(0, 5);
  sigma[2] ~ normal(0, 5);

  // non-centered prior on hierarchical parameters: https://mc-stan.org/users/documentation/case-studies/divergences_and_bias.html
  for (j in 1:J) {
    inter_z[j] ~ normal(0, 1);
    intra_z[j] ~ normal(0, 1);
    U_z[j] ~ normal(0, 1);

    // Should have some sort of prior on the initial conditions more specific that a wide uniform
    log_A[1, j] ~ normal(0, 10);
  }


  for (t in 2:T) {
    for (j in 1:J) {
      vector[I - 1] mu = U[j] + B[j] * log_A[t - 1, j];

      log_A[t, j] ~ normal(mu, sigma);
    }
  }

  for (t in 1:T) {
    for (j in 1:J) {
      y[t, j,] ~ multinomial(softmax([log_A[t, j, 1], log_A[t, j, 2], 0]'));
    }
  }
}

I didn’t check that that compiles so there are probably bugs.