Rstan convergence issue - mediation model with latent variables

I have a simple mediation model. The predictor, mediator, and outcome are all latent variables with three indicator. Unit loading identification is used for all three. The traceplots come out completely separate for the three chains (for all parameters). I have the stan code and the r code to generate the data and run the stan code below.

Stan code



data{
int <lower=0> N; // sample size
int <lower=0> P; // number of variables
matrix[N, P] Y; // data matrix Y with N rows and P columns
int <lower=0> D; // number of total latent variables
int <lower=0> K; // number of explanatory latent variables (ksi)
int <lower=0> E; // number of outcome latent variables (eta)

}

parameters{
vector[P] intercep_indicators; // intercepts of observed variables
vector[P-D] lam; // factor loadings
vector[3] beta; // structural regressions
vector[N] FS_Ksi; // factor scores of explanatory (exogenous) latent variables
matrix[N,E] FS_ETA; // factor scores of endogenous vars

vector<lower=0>[P] sd_indicators; // variance for observed variables
vector<lower=0>[E] sd_eta1; 
vector<lower=0>[E] sd_eta2; 
vector<lower=0>[K] sd_ksi; // variance for exogenous var ksi
}

transformed parameters{
matrix[N, P] indicator_pred; // predicted values of observed variables - indicators
vector[N] eta1_pred; // predicted values of eta1 - M 
vector[N] eta2_pred; // added predicted values of eta2 - Y


//measurment model
  for(i in 1:N){
    
    //ksi = X
    indicator_pred[i,1] = intercep_indicators[1] + FS_Ksi[i];
    indicator_pred[i,2] = intercep_indicators[2] + lam[1]*FS_Ksi[i];
    indicator_pred[i,3] = intercep_indicators[3] + lam[2]*FS_Ksi[i];
    
    //eta1 = M
    indicator_pred[i,4] = intercep_indicators[4] + FS_ETA[i,1];
    indicator_pred[i,5] = intercep_indicators[5] + lam[3]*FS_ETA[i,1];
    indicator_pred[i,6] = intercep_indicators[6] + lam[4]*FS_ETA[i,1];
    
    //eta2 = Y
    indicator_pred[i,7] = intercep_indicators[7] + FS_ETA[i,2];
    indicator_pred[i,8] = intercep_indicators[8] + lam[5]*FS_ETA[i,2];
    indicator_pred[i,9] = intercep_indicators[9] + lam[6]*FS_ETA[i,2];

  }

//structural paths
  for(i in 1:N){
    
    //beta1 = a
    //beta2 = b
    //beta3 = cp
    
    
    eta1_pred[i] = beta[1]*FS_Ksi[i];
    eta2_pred[i] = beta[2]*FS_ETA[i,1] + beta[3]*FS_Ksi[i] ;

 
  }
}
  
model{
  
  // priors on intercepts of the indicators
  for (i in 1:P) {intercep_indicators[i] ~ normal(0, 10);}
 
 
  // priors on factor loadings
  for (i in 1:(P-D)) {lam[i] ~ normal(0, 1);}

 
  // priors on regression coefficients
  for (i in 1:D) {beta[i] ~ normal(0, 2);}
 
  // priors on SD
  for(i in 1:P) {sd_indicators[i] ~ gamma(1, 0.5);}
  
  sd_ksi ~ gamma(1, 0.5);
  sd_eta1 ~ gamma(1, 0.5);
  sd_eta2 ~ gamma(1, 0.5);
  
  



  // likelihood
  for(i in 1:N){

    FS_Ksi[i] ~ normal(0, sd_ksi);
    
    FS_ETA[i,1]  ~  normal(eta1_pred[i], sd_eta1);
    FS_ETA[i,2]  ~  normal(eta2_pred[i], sd_eta2);
    
    
      target +=  normal_lpdf(Y[N, 1] | indicator_pred[i,1] , sd_indicators[1]);
      target +=  normal_lpdf(Y[N, 2] | indicator_pred[i,2] , sd_indicators[2]);
      target +=  normal_lpdf(Y[N, 3] | indicator_pred[i,3] , sd_indicators[3]);
      target +=  normal_lpdf(Y[N, 4] | indicator_pred[i,4] , sd_indicators[4]);
      target +=  normal_lpdf(Y[N, 5] | indicator_pred[i,5] , sd_indicators[5]);
      target +=  normal_lpdf(Y[N, 6] | indicator_pred[i,6] , sd_indicators[6]);
      target +=  normal_lpdf(Y[N, 7] | indicator_pred[i,7] , sd_indicators[7]);
      target +=  normal_lpdf(Y[N, 8] | indicator_pred[i,8] , sd_indicators[8]);
      target +=  normal_lpdf(Y[N, 9] | indicator_pred[i,9] , sd_indicators[9]);
      target +=  normal_lpdf(FS_ETA[i,1]| eta1_pred[i], sd_eta1);
      target +=  normal_lpdf(FS_ETA[i,2] | eta2_pred[i], sd_eta2);
        
      }
      
 
  
}
 
generated quantities{
  real med_eff = beta[1]*beta[2];
}
 

r code to generate the data

set.seed(121212)

ksi <- rnorm(200) # mean=0, sd=1
eta <- 0.8 * ksi + rnorm(200, 0, sqrt(1-(0.7^2 * var(ksi))));
eta2 <- 0.9 * ksi + 0.7 * eta +rnorm(200, 0, sqrt(1-(0.1^2 * var(ksi))));

x1 <- sqrt(.7) * ksi + rnorm(200, 0, sqrt(1-.7));
x2 <- sqrt(.7) * ksi + rnorm(200, 0, sqrt(1-.7));
x3 <- sqrt(.7) * ksi + rnorm(200, 0, sqrt(1-.7));
 
y1 <- sqrt(.8) * eta + rnorm(200, 0, sqrt(1-.8));
y2 <- sqrt(.8) * eta + rnorm(200, 0, sqrt(1-.8));
y3 <- sqrt(.8) * eta + rnorm(200, 0, sqrt(1-.8));

m1 <- sqrt(.85) * eta2 + rnorm(200, 0, sqrt(1-.85));
m2 <- sqrt(.85) * eta2 + rnorm(200, 0, sqrt(1-.85));
m3 <- sqrt(.85) * eta2 + rnorm(200, 0, sqrt(1-.85));




# Saving all variables in a data set 

data4med <- as.data.frame(cbind(x1, x2, x3, y1, y2, y3, m1, m2, m3))


r code to run the stan code on the generated data


Med_list<- list(
  N = nrow(data4med), # 200number of persons
  P = 9, # 9 number of items
  Y = as.matrix(data4med), # data matrix Y with N rows and P columns
  D = 3, #total number of latent vars
  K = 1, #number of exogenous latent vars
  E = 2 #number of endogenous latent var
)

mod= stan_model(file = 'code.stan')

fit<- sampling(mod,  data = Med_list, iter=5000, cores = ncores, chains = 3, control = list(adapt_delta = 0.99999, max_treedepth=14),  init="random", warmup=1000)