No convergence - mediation with latent variables in rstan

I’m fitting a simple mediation model. The predictor, mediator, and outcome are latent and each have three indicators. Unit loading identification is used.

The 3 chains are not converging for any of the parameters.
This is my first ever stan code and I’d really appreciate any help.

My Stan code:



data{
int <lower=0> N; // sample size
int <lower=0> P; // number of variables
matrix[N, P] Y; // data matrix Y with N rows and P columns
int <lower=0> D; // number of total latent variables
int <lower=0> K; // number of explanatory latent variables (ksi)
int <lower=0> E; // number of outcome latent variables (eta)

}

parameters{
vector[P] intercep_indicators; // intercepts of observed variables
vector[P-D] lam; // factor loadings
vector[3] beta; // structural regressions
vector[N] FS_Ksi; // factor scores of explanatory (exogenous) latent variables
matrix[N,E] FS_ETA; // factor scores of endogenous vars

vector<lower=0>[P] sd_indicators; // variance for observed variables
vector<lower=0>[E] sd_eta1; 
vector<lower=0>[E] sd_eta2; 
vector<lower=0>[K] sd_ksi; // variance for exogenous var ksi
}

transformed parameters{
matrix[N, P] indicator_pred; // predicted values of observed variables - indicators
vector[N] eta1_pred; // predicted values of eta1 - M 
vector[N] eta2_pred; // added predicted values of eta2 - Y


measurment model
  for(i in 1:N){
    
    //ksi = X
    indicator_pred[i,1] = intercep_indicators[1] + FS_Ksi[i];
    indicator_pred[i,2] = intercep_indicators[2] + lam[1]*FS_Ksi[i];
    indicator_pred[i,3] = intercep_indicators[3] + lam[2]*FS_Ksi[i];
    
    //eta1 = M
    indicator_pred[i,4] = intercep_indicators[4] + FS_ETA[i,1];
    indicator_pred[i,5] = intercep_indicators[5] + lam[3]*FS_ETA[i,1];
    indicator_pred[i,6] = intercep_indicators[6] + lam[4]*FS_ETA[i,1];
    
    //eta2 = Y
    indicator_pred[i,7] = intercep_indicators[7] + FS_ETA[i,2];
    indicator_pred[i,8] = intercep_indicators[8] + lam[5]*FS_ETA[i,2];
    indicator_pred[i,9] = intercep_indicators[9] + lam[6]*FS_ETA[i,2];

  }

//structural paths
  for(i in 1:N){
    
    //beta1 = a
    //beta2 = b
    //beta3 = cp
    
    
    eta1_pred[i] = beta[1]*FS_Ksi[i];
    eta2_pred[i] = beta[2]*FS_ETA[i,1] + beta[3]*FS_Ksi[i] ;

 
  }
}
  
model{
  
  // priors on intercepts of the indicators
  for (i in 1:P) {intercep_indicators[i] ~ normal(0, 10);}
 
 
  // priors on factor loadings
  for (i in 1:(P-D)) {lam[i] ~ normal(0, 10);}

 
  // priors on regression coefficients
  for (i in 1:D) {beta[i] ~ normal(0, 10);}
 
  // priors on SD
  for(i in 1:P) {sd_indicators[i] ~ gamma(1, 0.5);}
  
  sd_ksi ~ gamma(1, 0.5);
  sd_eta1 ~ gamma(1, 0.5);
  sd_eta2 ~ gamma(1, 0.5);
  
  



  // likelihood
  for(i in 1:N){

    FS_Ksi[i] ~ normal(0, sd_ksi);
    
    FS_ETA[i,1]  ~  normal(eta1_pred[i], sd_eta1);
    FS_ETA[i,2]  ~  normal(eta2_pred[i], sd_eta2);
    
    
      target +=  normal_lpdf(Y[N, 1] | indicator_pred[i,1] , sd_indicators[1]);
      target +=  normal_lpdf(Y[N, 2] | indicator_pred[i,2] , sd_indicators[2]);
      target +=  normal_lpdf(Y[N, 3] | indicator_pred[i,3] , sd_indicators[3]);
      target +=  normal_lpdf(Y[N, 4] | indicator_pred[i,4] , sd_indicators[4]);
      target +=  normal_lpdf(Y[N, 5] | indicator_pred[i,5] , sd_indicators[5]);
      target +=  normal_lpdf(Y[N, 6] | indicator_pred[i,6] , sd_indicators[6]);
      target +=  normal_lpdf(Y[N, 7] | indicator_pred[i,7] , sd_indicators[7]);
      target +=  normal_lpdf(Y[N, 8] | indicator_pred[i,8] , sd_indicators[8]);
      target +=  normal_lpdf(Y[N, 9] | indicator_pred[i,9] , sd_indicators[9]);
      target +=  normal_lpdf(FS_ETA[i,1]| eta1_pred[i], sd_eta1);
      target +=  normal_lpdf(FS_ETA[i,2] | eta2_pred[i], sd_eta2);
        
      }
      
 
  
}
 
generated quantities{
  real med_eff = beta[1]*beta[2];
}
 

My r code to run the stan code



// simulate data
set.seed(121212)

ksi <- rnorm(200)# mean=0, sd=1
eta <- 0.8 * ksi + rnorm(200, 0, sqrt(1-(0.7^2 * var(ksi))));
eta2 <- 0.9 * ksi + 0.7 * eta +rnorm(200, 0, sqrt(1-(0.1^2 * var(ksi))));

x1 <- sqrt(.7) * ksi + rnorm(200, 0, sqrt(1-.7));
x2 <- sqrt(.7) * ksi + rnorm(200, 0, sqrt(1-.7));
x3 <- sqrt(.7) * ksi + rnorm(200, 0, sqrt(1-.7));
 
y1 <- sqrt(.8) * eta + rnorm(200, 0, sqrt(1-.8));
y2 <- sqrt(.8) * eta + rnorm(200, 0, sqrt(1-.8));
y3 <- sqrt(.8) * eta + rnorm(200, 0, sqrt(1-.8));

m1 <- sqrt(.85) * eta2 + rnorm(200, 0, sqrt(1-.85));
m2 <- sqrt(.85) * eta2 + rnorm(200, 0, sqrt(1-.85));
m3 <- sqrt(.85) * eta2 + rnorm(200, 0, sqrt(1-.85));


dataMedPop1b <- as.data.frame(cbind(x1, x2, x3, y1, y2, y3, m1, m2, m3))


Med_list<- list(
  N = nrow(dataMedPop1b), # 200number of persons
  P = 9, # 9number of items
  Y = as.matrix(dataMedPop1b), # data matrix Y with N rows and P columns
  D = 3,
  K = 1,
  E = 2
)

mod = stan_model(file = 'code.stan')
fit <- sampling(mod,  data = Med_list, iter=5000, cores = ncores, chains = 3, control = list(adapt_delta = 0.99999, max_treedepth=15),  init="random", warmup=1000)

You’re using Med_list, but above you’ve defined dataMedPop1b?

Please also format your code with backtics (e.g., I think you can pick the “</>” symbol in the editor, and place your R code and Stan code, respectively, within such formatting).

Hi, yes med_list is the list input that stan uses. Here’s the reformatted code:

stan code


data{
int <lower=0> N; // sample size
int <lower=0> P; // number of variables
matrix[N, P] Y; // data matrix Y with N rows and P columns
int <lower=0> D; // number of total latent variables
int <lower=0> K; // number of explanatory latent variables (ksi)
int <lower=0> E; // number of outcome latent variables (eta)

}

parameters{
vector[P] intercep_indicators; // intercepts of observed variables
vector[P-D] lam; // factor loadings
vector[3] beta; // structural regressions
vector[N] FS_Ksi; // factor scores of explanatory (exogenous) latent variables
matrix[N,E] FS_ETA; // factor scores of endogenous vars

vector<lower=0>[P] sd_indicators; // variance for observed variables
vector<lower=0>[E] sd_eta1; 
vector<lower=0>[E] sd_eta2; 
vector<lower=0>[K] sd_ksi; // variance for exogenous var ksi
}

transformed parameters{
matrix[N, P] indicator_pred; // predicted values of observed variables - indicators
vector[N] eta1_pred; // predicted values of eta1 - M 
vector[N] eta2_pred; // added predicted values of eta2 - Y


//measurment model
  for(i in 1:N){
    
    //ksi = X
    indicator_pred[i,1] = intercep_indicators[1] + FS_Ksi[i];
    indicator_pred[i,2] = intercep_indicators[2] + lam[1]*FS_Ksi[i];
    indicator_pred[i,3] = intercep_indicators[3] + lam[2]*FS_Ksi[i];
    
    //eta1 = M
    indicator_pred[i,4] = intercep_indicators[4] + FS_ETA[i,1];
    indicator_pred[i,5] = intercep_indicators[5] + lam[3]*FS_ETA[i,1];
    indicator_pred[i,6] = intercep_indicators[6] + lam[4]*FS_ETA[i,1];
    
    //eta2 = Y
    indicator_pred[i,7] = intercep_indicators[7] + FS_ETA[i,2];
    indicator_pred[i,8] = intercep_indicators[8] + lam[5]*FS_ETA[i,2];
    indicator_pred[i,9] = intercep_indicators[9] + lam[6]*FS_ETA[i,2];

  }

//structural paths
  for(i in 1:N){
    
    //beta1 = a
    //beta2 = b
    //beta3 = cp
    
    
    eta1_pred[i] = beta[1]*FS_Ksi[i];
    eta2_pred[i] = beta[2]*FS_ETA[i,1] + beta[3]*FS_Ksi[i] ;

 
  }
}
  
model{
  
  // priors on intercepts of the indicators
  for (i in 1:P) {intercep_indicators[i] ~ normal(0, 10);}
 
 
  // priors on factor loadings
  for (i in 1:(P-D)) {lam[i] ~ normal(0, 1);}

 
  // priors on regression coefficients
  for (i in 1:D) {beta[i] ~ normal(0, 2);}
 
  // priors on SD
  for(i in 1:P) {sd_indicators[i] ~ gamma(1, 0.5);}
  
  sd_ksi ~ gamma(1, 0.5);
  sd_eta1 ~ gamma(1, 0.5);
  sd_eta2 ~ gamma(1, 0.5);
  
  



  // likelihood
  for(i in 1:N){

    FS_Ksi[i] ~ normal(0, sd_ksi);
    
    FS_ETA[i,1]  ~  normal(eta1_pred[i], sd_eta1);
    FS_ETA[i,2]  ~  normal(eta2_pred[i], sd_eta2);
    
    
      target +=  normal_lpdf(Y[N, 1] | indicator_pred[i,1] , sd_indicators[1]);
      target +=  normal_lpdf(Y[N, 2] | indicator_pred[i,2] , sd_indicators[2]);
      target +=  normal_lpdf(Y[N, 3] | indicator_pred[i,3] , sd_indicators[3]);
      target +=  normal_lpdf(Y[N, 4] | indicator_pred[i,4] , sd_indicators[4]);
      target +=  normal_lpdf(Y[N, 5] | indicator_pred[i,5] , sd_indicators[5]);
      target +=  normal_lpdf(Y[N, 6] | indicator_pred[i,6] , sd_indicators[6]);
      target +=  normal_lpdf(Y[N, 7] | indicator_pred[i,7] , sd_indicators[7]);
      target +=  normal_lpdf(Y[N, 8] | indicator_pred[i,8] , sd_indicators[8]);
      target +=  normal_lpdf(Y[N, 9] | indicator_pred[i,9] , sd_indicators[9]);
      target +=  normal_lpdf(FS_ETA[i,1]| eta1_pred[i], sd_eta1);
      target +=  normal_lpdf(FS_ETA[i,2] | eta2_pred[i], sd_eta2);
        
      }
      
 
  
}
 
generated quantities{
  real med_eff = beta[1]*beta[2];
}

r code to generate the data

set.seed(121212)

ksi <- rnorm(200) # mean=0, sd=1
eta <- 0.8 * ksi + rnorm(200, 0, sqrt(1-(0.7^2 * var(ksi))));
eta2 <- 0.9 * ksi + 0.7 * eta +rnorm(200, 0, sqrt(1-(0.1^2 * var(ksi))));

x1 <- sqrt(.7) * ksi + rnorm(200, 0, sqrt(1-.7));
x2 <- sqrt(.7) * ksi + rnorm(200, 0, sqrt(1-.7));
x3 <- sqrt(.7) * ksi + rnorm(200, 0, sqrt(1-.7));
 
y1 <- sqrt(.8) * eta + rnorm(200, 0, sqrt(1-.8));
y2 <- sqrt(.8) * eta + rnorm(200, 0, sqrt(1-.8));
y3 <- sqrt(.8) * eta + rnorm(200, 0, sqrt(1-.8));

m1 <- sqrt(.85) * eta2 + rnorm(200, 0, sqrt(1-.85));
m2 <- sqrt(.85) * eta2 + rnorm(200, 0, sqrt(1-.85));
m3 <- sqrt(.85) * eta2 + rnorm(200, 0, sqrt(1-.85));




# Saving all variables in a data set 

dataMedPop1b <- as.data.frame(cbind(x1, x2, x3, y1, y2, y3, m1, m2, m3))

r code to run the stan code on the generated data

Med_list<- list(
  N = nrow(dataMedPop1b), # 200number of persons
  P = 9, # 9 number of items
  Y = as.matrix(dataMedPop1b), # data matrix Y with N rows and P columns
  D = 3, #total number of latent vars
  K = 1, #number of exogenous latent vars
  E = 2 #number of endogenous latent var
)

mod= stan_model(file = 'code.stan')

fit<- sampling(mod,  data = Med_list, iter=5000, cores = ncores, chains = 3, control = list(adapt_delta = 0.99999, max_treedepth=14),  init="random", warmup=1000)