Model convergence issue

//Joint LC/RH model for fertility and surival
data {
  int<lower=1> Nm; // number of mortality observation
  int<lower=1> Nf; // number of fertility observation
  int<lower=1> T; // years
  int<lower=1> Am; // ages for mortality
  int<lower=1> Af; // ages for fertility
  int<lower=1> Cm; // cohort index for mortality
  int<lower=1> Cf; // cohort index for fertility

  array[Nm] int<lower=1,upper=Am> am; // age index for mortality
  array[Nf] int<lower=1,upper=Af> af; // age index for fertility
  array[Nm] int<lower=1,upper=T> ti; // time index
  array[Nm] int<lower=1,upper=Cm> cm; // cohort index for mortality
  array[Nf] int<lower=1,upper=Cf> cf; // cohort index for fertility

  vector[Nm] ym; // mortality rates
  vector[Nf] yf; // fertility rates
}
parameters {

  vector[Am] alpha_m; // age-specific intercept
  vector[Af] alpha_f;

  simplex[Am] beta_m; // coefficient for temporal effect
  simplex[Af] beta_f;
  simplex[Am] beta_m_g; // coefficient for cohort effect
  simplex[Af] beta_f_g;

  array[T] vector[2] kappa; // temporal factor

  vector[Cm] gamma_m; // cohort effect
  vector[Cf] gamma_f;

  real<lower=0> sigma_m; // variances
  real<lower=0> sigma_f;
  real<lower=0> sigma_alpha_m;
  real<lower=0> sigma_alpha_f;
  real<lower=0> sigma_gamma_m;
  real<lower=0> sigma_gamma_f;

  vector[2] phi0; // VAR(1) intercept
  matrix[2,2] phi; // vAR(1) coefficient matrix
  vector<lower=0>[2] sigma_k; // variance
  cholesky_factor_corr[2] Lcorr_k; // corrrelation
}
transformed parameters {
  matrix[2,2] L_sigma_k = diag_pre_multiply(sigma_k, Lcorr_k);
}
model {

  // variances
  target += student_t_lpdf(sigma_m|5,0,1);
  target += student_t_lpdf(sigma_f|5,0,1);
  target += student_t_lpdf(sigma_alpha_m|5,0,1);
  target += student_t_lpdf(sigma_alpha_f|5,0,1);
  target += student_t_lpdf(sigma_gamma_m|5,0,1);
  target += student_t_lpdf(sigma_gamma_f|5,0,1);

  // VAR(1) parameters
  target += normal_lpdf(phi0|0,1);
  target += normal_lpdf(to_vector(phi)|0,0.5);
  target += student_t_lpdf(sigma_k|5,0,1);
  target += lkj_corr_cholesky_lpdf(Lcorr_k|2);

  // Dirichlet prior for beta
  target += dirichlet_lpdf(beta_m|rep_vector(1, Am));
  target += dirichlet_lpdf(beta_f|rep_vector(1, Af));
  target += dirichlet_lpdf(beta_m_g|rep_vector(1, Am));
  target += dirichlet_lpdf(beta_f_g|rep_vector(1, Af));

  // age effect
  target += normal_lpdf(alpha_m[1:2]|0, sigma_alpha_m);
  for(a in 3:Am){
    target += normal_lpdf(alpha_m[a]|2alpha_m[a-1]-alpha_m[a-2], sigma_alpha_m);
  }
  target += normal_lpdf(alpha_f[1:2]|0,sigma_alpha_f);
  for(a in 3:Af){
    target += normal_lpdf(alpha_f[a]|2alpha_f[a-1]-alpha_f[a-2], sigma_alpha_f);
  }

  // cohort effect
  target += normal_lpdf(gamma_m|0, sigma_gamma_m);
  target += normal_lpdf(gamma_f|0, sigma_gamma_f);

  // VAR(1) temporal effect
  target += multi_normal_cholesky_lpdf(kappa[1]|phi0, L_sigma_k);
  for(t in 2:T){
    target += multi_normal_cholesky_lpdf(kappa[t]|phi0+phi*kappa[t-1], L_sigma_k);
  }

  // soft sum-zero constraints
  sum(gamma_m) ~ normal(0, 0.001Cm);
  sum(gamma_f) ~ normal(0, 0.001Cf);

  sum(kappa[,1]) ~ normal(0, 0.001T);
  sum(kappa[,2]) ~ normal(0, 0.001T);

  // likelihood
  for(n in 1:Nm){
    real eta = alpha_m[am[n]] + beta_m[am[n]] * kappa[ti[n]][1] + beta_m_g[am[n]]*gamma_m[cm[n]];
    target += normal_lpdf(ym[n]|eta, sigma_m);
  }
  for(n in 1:Nf){
    real eta = alpha_f[af[n]] + beta_f[af[n]] * kappa[ti[n]][2] + beta_f_g[af[n]]*gamma_f[cf[n]];
    target += normal_lpdf(yf[n]|eta, sigma_f);
  }
}

I am trying fit this model but I am getting convergence issues. especially parameters for kappa and gamma are showing high rhat and low ess.

I would suggest declaring them as sum_to_zero_vector (this won’t work in RStan because it’s too far behind Stan releases). This is much more robust than using soft-sum-to-zero as you have here.

But I don’t see how your Stan code could have even compiled looking like this:

sum(gamma_m) ~ normal(0, 0.001<em>Cm);
sum(gamma_f) ~ normal(0, 0.001</em>Cf);

sum(kappa[,1]) ~ normal(0, 0.001<em>T);
sum(kappa[,2]) ~ normal(0, 0.001</em>T);

P.S. Markdown supports Stan code blocks, so I escaped yours and also added standard indentation to make it easier to read.

You might also try tighter priors. Those Student-t priors are both wide tailed and consistent with zero. Something like lognormal might work more stably.

You can also vectorize all the distributions. For example, this

 for(n in 1:Nm){
    real eta = alpha_m[am[n]] + beta_m[am[n]] * kappa[ti[n]][1] + beta_m_g[am[n]]*gamma_m[cm[n]];
    target += normal_lpdf(ym[n]|eta, sigma_m);
  }

can be written more concisely and efficiently as:

vector[Nm] eta = alpha_m[am] + beta_m[am} + kappa[ti,1] + beta_m_g[am] .* gamma_m[cm];
ym ~ normal(eta, sigma_m);

Or if you don’t like the distribution notation, use target += normal_lupdf(...) to drop the normalizing constant as your’e not going to need it here for sampling.

Thank you so much for the response. I have modified the model as given below. I tried the lognormal prior instead of the Student-t prior for the sigmas as well. However, I do not achieve much in terms of R-hat. I would be grateful for any more suggestions that can improve the model.
//Joint LC/RH model for fertility and mortality
data {
int<lower=1> Nm; // number of observations
int<lower=1> Nf;
int<lower=1> T; // years
int<lower=1> Am; // ages
int<lower=1> Af;
int<lower=1> Cm; // cohorts
int<lower=1> Cf;

array[Nm] int<lower=1,upper=Am> am; // age index
array[Nf] int<lower=1,upper=Af> af;
array[Nm] int<lower=1,upper=T> tm; // time index
array[Nf] int<lower=1,upper=T> tf;
array[Nm] int<lower=1,upper=Cm> cm; // cohort index
array[Nf] int<lower=1,upper=Cf> cf;

vector[Nm] mx; // mortality rates
vector[Nf] fx; // fertility rates
}
transformed data{
vector[Nm] ym;
vector[Nf] yf;
ym = log(fmax(mx, 1e-9));
yf = log(fmax(fx, 1e-9));
}
parameters {

vector[Am] alpha_m; // age-specific intercept
vector[Af] alpha_f;

simplex[Am] beta_m; // coefficient for temporal effect
simplex[Af] beta_f;
simplex[Am] beta_m_g; // coefficient for cohort effect
simplex[Af] beta_f_g;

sum_to_zero_vector[T] kappa_m; // temporal factor
sum_to_zero_vector[T] kappa_f;

sum_to_zero_vector[Cm] gamma_m; // cohort effect
sum_to_zero_vector[Cf] gamma_f;

real<lower=0> sigma_m; // variances
real<lower=0> sigma_f;
real<lower=0> sigma_alpha_m;
real<lower=0> sigma_alpha_f;
real<lower=0> sigma_gamma_m;
real<lower=0> sigma_gamma_f;

vector[2] phi0; // VAR(1) drift
matrix<lower=0,upper=1>[2,2] phi; // vAR(1) coefficient matrix
vector<lower=0>[2] sigma_k; // variance
cholesky_factor_corr[2] Lcorr_k; // corrrelation
}
transformed parameters {
matrix[2,2] L_sigma_k = diag_pre_multiply(sigma_k, Lcorr_k);
array[T] vector[2] kappa;
for(t in 1:T){
kappa[t][1] = kappa_m[t];
kappa[t][2] = kappa_f[t];
}
}
model {

// variances
target += student_t_lpdf(sigma_m|5,0,1);
target += student_t_lpdf(sigma_f|5,0,1);
target += student_t_lpdf(sigma_alpha_m|5,0,1);
target += student_t_lpdf(sigma_alpha_f|5,0,1);
target += student_t_lpdf(sigma_gamma_m|5,0,1);
target += student_t_lpdf(sigma_gamma_f|5,0,1);
target += student_t_lpdf(sigma_k|5,0,1);

// VAR(1) parameters
target += normal_lpdf(phi0|0,1);
target += beta_lpdf(to_vector(phi)|1,1);
target += lkj_corr_cholesky_lpdf(Lcorr_k|2);

// Dirichlet prior for beta
target += dirichlet_lpdf(beta_m|rep_vector(1, Am));
target += dirichlet_lpdf(beta_f|rep_vector(1, Af));
target += dirichlet_lpdf(beta_m_g|rep_vector(1, Am));
target += dirichlet_lpdf(beta_f_g|rep_vector(1, Af));

// age effect
target += normal_lpdf(alpha_m[1:2]|0, sigma_alpha_m);
target += normal_lpdf(alpha_m[3:Am]|2alpha_m[2:(Am-1)]-alpha_m[1:(Am-2)], sigma_alpha_m);
target += normal_lpdf(alpha_f[1:2]|0,sigma_alpha_f);
target += normal_lpdf(alpha_f[3:Af]|2
alpha_f[2:(Af-1)]-alpha_f[1:(Af-2)], sigma_alpha_f);

// cohort effect
target += normal_lpdf(gamma_m[1]|0, sigma_gamma_m);
target += normal_lpdf(gamma_m[2:Cm]|gamma_m[1:(Cm-1)], sigma_gamma_m);
target += normal_lpdf(gamma_f[1]|0, sigma_gamma_f);
target += normal_lpdf(gamma_f[2:Cf]|gamma_f[1:(Cf-1)], sigma_gamma_f);

// VAR(1) temporal effect
target += multi_normal_cholesky_lpdf(kappa[1]|phi0, L_sigma_k);
for(t in 2:T){
target += multi_normal_cholesky_lpdf(kappa[t]|phi0+phi*kappa[t-1], L_sigma_k);
}

// likelihood
vector[Nm] eta_m = alpha_m[am] + beta_m[am] .* kappa_m[tm] + beta_m_g[am] .* gamma_m[cm];
target += normal_lupdf(ym| eta_m, sigma_m);
vector[Nf] eta_f = alpha_f[af] + beta_f[af] .* kappa_f[tf] + beta_f_g[af] .* gamma_f[cf];
target += normal_lupdf(yf| eta_f, sigma_f);
}