# No convergence - mediation with latent variables in rstan

I’m fitting a simple mediation model. The predictor, mediator, and outcome are latent and each have three indicators. Unit loading identification is used.

The 3 chains are not converging for any of the parameters.
This is my first ever stan code and I’d really appreciate any help.

# My Stan code:

``````

data{
int <lower=0> N; // sample size
int <lower=0> P; // number of variables
matrix[N, P] Y; // data matrix Y with N rows and P columns
int <lower=0> D; // number of total latent variables
int <lower=0> K; // number of explanatory latent variables (ksi)
int <lower=0> E; // number of outcome latent variables (eta)

}

parameters{
vector[P] intercep_indicators; // intercepts of observed variables
vector[3] beta; // structural regressions
vector[N] FS_Ksi; // factor scores of explanatory (exogenous) latent variables
matrix[N,E] FS_ETA; // factor scores of endogenous vars

vector<lower=0>[P] sd_indicators; // variance for observed variables
vector<lower=0>[E] sd_eta1;
vector<lower=0>[E] sd_eta2;
vector<lower=0>[K] sd_ksi; // variance for exogenous var ksi
}

transformed parameters{
matrix[N, P] indicator_pred; // predicted values of observed variables - indicators
vector[N] eta1_pred; // predicted values of eta1 - M
vector[N] eta2_pred; // added predicted values of eta2 - Y

measurment model
for(i in 1:N){

//ksi = X
indicator_pred[i,1] = intercep_indicators[1] + FS_Ksi[i];
indicator_pred[i,2] = intercep_indicators[2] + lam[1]*FS_Ksi[i];
indicator_pred[i,3] = intercep_indicators[3] + lam[2]*FS_Ksi[i];

//eta1 = M
indicator_pred[i,4] = intercep_indicators[4] + FS_ETA[i,1];
indicator_pred[i,5] = intercep_indicators[5] + lam[3]*FS_ETA[i,1];
indicator_pred[i,6] = intercep_indicators[6] + lam[4]*FS_ETA[i,1];

//eta2 = Y
indicator_pred[i,7] = intercep_indicators[7] + FS_ETA[i,2];
indicator_pred[i,8] = intercep_indicators[8] + lam[5]*FS_ETA[i,2];
indicator_pred[i,9] = intercep_indicators[9] + lam[6]*FS_ETA[i,2];

}

//structural paths
for(i in 1:N){

//beta1 = a
//beta2 = b
//beta3 = cp

eta1_pred[i] = beta[1]*FS_Ksi[i];
eta2_pred[i] = beta[2]*FS_ETA[i,1] + beta[3]*FS_Ksi[i] ;

}
}

model{

// priors on intercepts of the indicators
for (i in 1:P) {intercep_indicators[i] ~ normal(0, 10);}

for (i in 1:(P-D)) {lam[i] ~ normal(0, 10);}

// priors on regression coefficients
for (i in 1:D) {beta[i] ~ normal(0, 10);}

// priors on SD
for(i in 1:P) {sd_indicators[i] ~ gamma(1, 0.5);}

sd_ksi ~ gamma(1, 0.5);
sd_eta1 ~ gamma(1, 0.5);
sd_eta2 ~ gamma(1, 0.5);

// likelihood
for(i in 1:N){

FS_Ksi[i] ~ normal(0, sd_ksi);

FS_ETA[i,1]  ~  normal(eta1_pred[i], sd_eta1);
FS_ETA[i,2]  ~  normal(eta2_pred[i], sd_eta2);

target +=  normal_lpdf(Y[N, 1] | indicator_pred[i,1] , sd_indicators[1]);
target +=  normal_lpdf(Y[N, 2] | indicator_pred[i,2] , sd_indicators[2]);
target +=  normal_lpdf(Y[N, 3] | indicator_pred[i,3] , sd_indicators[3]);
target +=  normal_lpdf(Y[N, 4] | indicator_pred[i,4] , sd_indicators[4]);
target +=  normal_lpdf(Y[N, 5] | indicator_pred[i,5] , sd_indicators[5]);
target +=  normal_lpdf(Y[N, 6] | indicator_pred[i,6] , sd_indicators[6]);
target +=  normal_lpdf(Y[N, 7] | indicator_pred[i,7] , sd_indicators[7]);
target +=  normal_lpdf(Y[N, 8] | indicator_pred[i,8] , sd_indicators[8]);
target +=  normal_lpdf(Y[N, 9] | indicator_pred[i,9] , sd_indicators[9]);
target +=  normal_lpdf(FS_ETA[i,1]| eta1_pred[i], sd_eta1);
target +=  normal_lpdf(FS_ETA[i,2] | eta2_pred[i], sd_eta2);

}

}

generated quantities{
real med_eff = beta[1]*beta[2];
}

``````

# My r code to run the stan code

``````

// simulate data
set.seed(121212)

ksi <- rnorm(200)# mean=0, sd=1
eta <- 0.8 * ksi + rnorm(200, 0, sqrt(1-(0.7^2 * var(ksi))));
eta2 <- 0.9 * ksi + 0.7 * eta +rnorm(200, 0, sqrt(1-(0.1^2 * var(ksi))));

x1 <- sqrt(.7) * ksi + rnorm(200, 0, sqrt(1-.7));
x2 <- sqrt(.7) * ksi + rnorm(200, 0, sqrt(1-.7));
x3 <- sqrt(.7) * ksi + rnorm(200, 0, sqrt(1-.7));

y1 <- sqrt(.8) * eta + rnorm(200, 0, sqrt(1-.8));
y2 <- sqrt(.8) * eta + rnorm(200, 0, sqrt(1-.8));
y3 <- sqrt(.8) * eta + rnorm(200, 0, sqrt(1-.8));

m1 <- sqrt(.85) * eta2 + rnorm(200, 0, sqrt(1-.85));
m2 <- sqrt(.85) * eta2 + rnorm(200, 0, sqrt(1-.85));
m3 <- sqrt(.85) * eta2 + rnorm(200, 0, sqrt(1-.85));

dataMedPop1b <- as.data.frame(cbind(x1, x2, x3, y1, y2, y3, m1, m2, m3))

Med_list<- list(
N = nrow(dataMedPop1b), # 200number of persons
P = 9, # 9number of items
Y = as.matrix(dataMedPop1b), # data matrix Y with N rows and P columns
D = 3,
K = 1,
E = 2
)

mod = stan_model(file = 'code.stan')
fit <- sampling(mod,  data = Med_list, iter=5000, cores = ncores, chains = 3, control = list(adapt_delta = 0.99999, max_treedepth=15),  init="random", warmup=1000)``````

You’re using `Med_list`, but above you’ve defined `dataMedPop1b`?

Please also format your code with backtics (e.g., I think you can pick the “</>” symbol in the editor, and place your R code and Stan code, respectively, within such formatting).

Hi, yes med_list is the list input that stan uses. Here’s the reformatted code:

# stan code

``````
data{
int <lower=0> N; // sample size
int <lower=0> P; // number of variables
matrix[N, P] Y; // data matrix Y with N rows and P columns
int <lower=0> D; // number of total latent variables
int <lower=0> K; // number of explanatory latent variables (ksi)
int <lower=0> E; // number of outcome latent variables (eta)

}

parameters{
vector[P] intercep_indicators; // intercepts of observed variables
vector[3] beta; // structural regressions
vector[N] FS_Ksi; // factor scores of explanatory (exogenous) latent variables
matrix[N,E] FS_ETA; // factor scores of endogenous vars

vector<lower=0>[P] sd_indicators; // variance for observed variables
vector<lower=0>[E] sd_eta1;
vector<lower=0>[E] sd_eta2;
vector<lower=0>[K] sd_ksi; // variance for exogenous var ksi
}

transformed parameters{
matrix[N, P] indicator_pred; // predicted values of observed variables - indicators
vector[N] eta1_pred; // predicted values of eta1 - M
vector[N] eta2_pred; // added predicted values of eta2 - Y

//measurment model
for(i in 1:N){

//ksi = X
indicator_pred[i,1] = intercep_indicators[1] + FS_Ksi[i];
indicator_pred[i,2] = intercep_indicators[2] + lam[1]*FS_Ksi[i];
indicator_pred[i,3] = intercep_indicators[3] + lam[2]*FS_Ksi[i];

//eta1 = M
indicator_pred[i,4] = intercep_indicators[4] + FS_ETA[i,1];
indicator_pred[i,5] = intercep_indicators[5] + lam[3]*FS_ETA[i,1];
indicator_pred[i,6] = intercep_indicators[6] + lam[4]*FS_ETA[i,1];

//eta2 = Y
indicator_pred[i,7] = intercep_indicators[7] + FS_ETA[i,2];
indicator_pred[i,8] = intercep_indicators[8] + lam[5]*FS_ETA[i,2];
indicator_pred[i,9] = intercep_indicators[9] + lam[6]*FS_ETA[i,2];

}

//structural paths
for(i in 1:N){

//beta1 = a
//beta2 = b
//beta3 = cp

eta1_pred[i] = beta[1]*FS_Ksi[i];
eta2_pred[i] = beta[2]*FS_ETA[i,1] + beta[3]*FS_Ksi[i] ;

}
}

model{

// priors on intercepts of the indicators
for (i in 1:P) {intercep_indicators[i] ~ normal(0, 10);}

for (i in 1:(P-D)) {lam[i] ~ normal(0, 1);}

// priors on regression coefficients
for (i in 1:D) {beta[i] ~ normal(0, 2);}

// priors on SD
for(i in 1:P) {sd_indicators[i] ~ gamma(1, 0.5);}

sd_ksi ~ gamma(1, 0.5);
sd_eta1 ~ gamma(1, 0.5);
sd_eta2 ~ gamma(1, 0.5);

// likelihood
for(i in 1:N){

FS_Ksi[i] ~ normal(0, sd_ksi);

FS_ETA[i,1]  ~  normal(eta1_pred[i], sd_eta1);
FS_ETA[i,2]  ~  normal(eta2_pred[i], sd_eta2);

target +=  normal_lpdf(Y[N, 1] | indicator_pred[i,1] , sd_indicators[1]);
target +=  normal_lpdf(Y[N, 2] | indicator_pred[i,2] , sd_indicators[2]);
target +=  normal_lpdf(Y[N, 3] | indicator_pred[i,3] , sd_indicators[3]);
target +=  normal_lpdf(Y[N, 4] | indicator_pred[i,4] , sd_indicators[4]);
target +=  normal_lpdf(Y[N, 5] | indicator_pred[i,5] , sd_indicators[5]);
target +=  normal_lpdf(Y[N, 6] | indicator_pred[i,6] , sd_indicators[6]);
target +=  normal_lpdf(Y[N, 7] | indicator_pred[i,7] , sd_indicators[7]);
target +=  normal_lpdf(Y[N, 8] | indicator_pred[i,8] , sd_indicators[8]);
target +=  normal_lpdf(Y[N, 9] | indicator_pred[i,9] , sd_indicators[9]);
target +=  normal_lpdf(FS_ETA[i,1]| eta1_pred[i], sd_eta1);
target +=  normal_lpdf(FS_ETA[i,2] | eta2_pred[i], sd_eta2);

}

}

generated quantities{
real med_eff = beta[1]*beta[2];
}
``````

# r code to generate the data

``````set.seed(121212)

ksi <- rnorm(200) # mean=0, sd=1
eta <- 0.8 * ksi + rnorm(200, 0, sqrt(1-(0.7^2 * var(ksi))));
eta2 <- 0.9 * ksi + 0.7 * eta +rnorm(200, 0, sqrt(1-(0.1^2 * var(ksi))));

x1 <- sqrt(.7) * ksi + rnorm(200, 0, sqrt(1-.7));
x2 <- sqrt(.7) * ksi + rnorm(200, 0, sqrt(1-.7));
x3 <- sqrt(.7) * ksi + rnorm(200, 0, sqrt(1-.7));

y1 <- sqrt(.8) * eta + rnorm(200, 0, sqrt(1-.8));
y2 <- sqrt(.8) * eta + rnorm(200, 0, sqrt(1-.8));
y3 <- sqrt(.8) * eta + rnorm(200, 0, sqrt(1-.8));

m1 <- sqrt(.85) * eta2 + rnorm(200, 0, sqrt(1-.85));
m2 <- sqrt(.85) * eta2 + rnorm(200, 0, sqrt(1-.85));
m3 <- sqrt(.85) * eta2 + rnorm(200, 0, sqrt(1-.85));

# Saving all variables in a data set

dataMedPop1b <- as.data.frame(cbind(x1, x2, x3, y1, y2, y3, m1, m2, m3))

``````

# r code to run the stan code on the generated data

``````Med_list<- list(
N = nrow(dataMedPop1b), # 200number of persons
P = 9, # 9 number of items
Y = as.matrix(dataMedPop1b), # data matrix Y with N rows and P columns
D = 3, #total number of latent vars
K = 1, #number of exogenous latent vars
E = 2 #number of endogenous latent var
)

mod= stan_model(file = 'code.stan')

fit<- sampling(mod,  data = Med_list, iter=5000, cores = ncores, chains = 3, control = list(adapt_delta = 0.99999, max_treedepth=14),  init="random", warmup=1000)
``````