Translation of an SEM with Ordered Categorical Variables from WinBUGS

Sorry, I thought I had the Lee book but can’t find it, and I am working off your description. I believe that Lee is fixing the thresholds, so it would be ok to estimate psi. I think it is more common to fix psi to 1 and estimate the thresholds. Below is a version of the model where thresholds are estimated instead of psi. I suspect it could still be improved, but maybe it gives you ideas about different ways to estimate the model. I changed the priors a bit.

data{
  int<lower=0> N; // number of observations
  int<lower=1> P; // number of variables
  array[N, P] int<lower=1, upper=5> z; // observed data
}
transformed data {
  cov_matrix[4] R = diag_matrix(rep_vector(1.0, 4)); // prior covariance matrix for xi
  vector[4] u = rep_vector(0, 4); // mean for xi
  array[N, P] int ones; // to use bernoulli likelihood

  for (i in 1:N) for (j in 1:P) ones[i,j] = 1;
}
parameters {
  vector[21] lam; // pattern coefficients
  row_vector[4] gam; // coefficients
  real<lower=0> psd; // precision of eta
  cov_matrix[4] phi;
  array[N] vector[4] xi; // latent variables
  vector[N] eta_std; // latent variables
  array[P] ordered[4] thd;
}
transformed parameters {
  array[N] vector[P] L;
  array[N] vector[P] U;
  array[N] vector[P] mu;
  matrix[4, 4] phx = cholesky_decompose(phi);
  vector[N] eta;
  real sgd = pow(psd, -.5);
  
  for(i in 1:N){
    eta[i] = sgd * eta_std[i] + gam * xi[i];
    for(j in 1:P){
      if (z[i,j] == 1) {
        L[i,j] = negative_infinity();
      } else {
        L[i,j] = thd[j, z[i, j] - 1];
      }
      if (z[i,j] == 5) {
        U[i,j] = positive_infinity();
      } else {
        U[i,j] = thd[j, z[i, j]];
      }
    }
    
    // measurement equation model, matrices would make this more concise:
    mu[i,1] = eta[i];
    mu[i,2] = lam[1] * eta[i];
    mu[i,3] = xi[i, 1];
    mu[i,4] = lam[2] * xi[i, 1];
    mu[i,5] = lam[3] * xi[i, 1];
    mu[i,6] = lam[4] * xi[i, 1];
    mu[i,7] = lam[5] * xi[i, 1];
    mu[i,8] = lam[6] * xi[i, 1];
    mu[i,9] = lam[7] * xi[i, 1];
    mu[i,10] = xi[i, 2];
    mu[i,11] = lam[8] * xi[i, 2];
    mu[i,12] = lam[9] * xi[i, 2];
    mu[i,13] = lam[10] * xi[i, 2];
    mu[i,14] = lam[11] * xi[i, 2];
    mu[i,15] = lam[12] * xi[i, 2];
    mu[i,16] = xi[i, 3];
    mu[i,17] = lam[13] * xi[i, 3];
    mu[i,18] = lam[14] * xi[i, 3];
    mu[i,19] = xi[i, 4];
    mu[i,20] = lam[15] * xi[i, 4];
    mu[i,21] = lam[16] * xi[i, 4];
    mu[i,22] = lam[17] * xi[i, 4];
    mu[i,23] = lam[18] * xi[i, 4];
    mu[i,24] = lam[19] * xi[i, 4];
    mu[i,25] = lam[20] * xi[i, 4];
    mu[i,26] = lam[21] * xi[i, 4];
  }
}
model {
  for(i in 1:N){
    xi[i] ~ multi_normal_cholesky(u, phx);

    eta_std[i] ~ std_normal();

    for(j in 1:P){
      // Phi_approx() leads to not finite gradient:
      ones[i,j] ~ bernoulli(Phi(U[i,j] - mu[i,j]) - Phi(L[i,j] - mu[i,j]));
    }
  } // end of i
    
  //priors on loadings and coefficients
  lam ~ normal(1, .4); // (what you call psi is fixed to 1 for these models)
  psd ~ gamma(1, .5); // prior on eta precision
  gam ~ normal(0.6, 1);
  phi ~ inv_wishart(5, R); // prior on xi covariance matrix
}
generated quantities {
  real dthat[N];

  for (i in 1:N) {
    dthat[i] = eta[i] - gam * xi[i];
  }
}

And here are some inits for the model:

z <- data$z

inits <- list(lam = array(1, 21), gam = array(0, 4), psd = 1, phi = diag(1, 4),
              xi = array(.1, c(nrow(z), 4)), eta = array(.1, nrow(z)),
              thd = t(array(seq(-1, 1, length.out = 4), c(4, ncol(z)))))
inits <- lapply(1:4, function(x) inits)