I am working with a scenario where I am simulating a mediation model with various numbers of latent mediators. I am trying to make my stan code able to run regardless of the number of mediators. Ok so here is where I am at. I am simulating my data in R using the package lavaan with the following code without any problems:
pop_model = paste('
# Measurement model for the latent mediator
M1 =~ ', lambda[1], '*I1.1 +
', lambda[2], '*I1.2 +
', lambda[3], '*I1.3
M2 =~ ', lambda[1], '*I2.1 +
', lambda[2], '*I2.2 +
', lambda[3], '*I2.3
M3 =~ ', lambda[1], '*I3.1 +
', lambda[2], '*I3.2 +
', lambda[3], '*I3.3
# Regressions
M1 ~ ',beta[1],'*X
M2 ~ ',beta[2],'*X
M3 ~ ',beta[3],'*X
Y ~ ',beta[6],'*X +
',beta[1],'*M1 +
',beta[2],'*M2 +
',beta[3],'*M3
')
Now I have gotten Stan to work just fine if I send the collection of indicators for the mediators one at a time. with the following R code:
I1 <- array(dim = c(N,K))
I2 <- array(dim = c(N,K))
I3 <- array(dim = c(N,K))
for(k in 1:K){
I1[ ,k] <- med_data[,k]
I2[ ,k] <- med_data[,k+K]
I3[ ,k] <- med_data[,k+K*2]
}
......
stan_data <- list(N = N, J = J, K = K, X = med_data$X_c, Y = med_data$Y_c,
I1 = I1,
I2 = I2,
I3 = I3
)
and the code in stan that works to interpret this pass is as follows:
data {
int<lower=1> N; // Number of observations
int<lower=1> K; // Number of indicators per mediator (assuming 5 for both M1 and M2)
matrix[N, K] I1; // Indicator data for mediator 1
matrix[N, K] I2; // Indicator data for mediator 2
matrix[N, K] I3; // Indicator data for mediator 3
vector[N] X; // Independent variable
vector[N] Y; // Dependent variable
........
for (k in 1:K) {
if (k == 1) {
I1[, k] ~ normal(alpha1[k] + 1 * eta1, sigma_I1[k]);
I2[, k] ~ normal(alpha2[k] + 1 * eta2, sigma_I2[k]);
I3[, k] ~ normal(alpha3[k] + 1 * eta3, sigma_I3[k]);
} else {
I1[, k] ~ normal(alpha1[k] + lambda1[k] * eta1, sigma_I1[k]);
I2[, k] ~ normal(alpha2[k] + lambda2[k] * eta2, sigma_I2[k]);
I3[, k] ~ normal(alpha3[k] + lambda3[k] * eta3, sigma_I3[k]);
}
}
}
Where I run into issues is when I try to generalize this setup. I want to be able to pass instead of 3 explicitly stated set of indicators I want to send a group of J mediators. This is what I have tried in R to pass this 3d object:
I1 <- array(dim = c(N,K))
I2 <- array(dim = c(N,K))
I3 <- array(dim = c(N,K))
for(k in 1:K){
I1[ ,k] <- med_data[,k]
I2[ ,k] <- med_data[,k+K]
I3[ ,k] <- med_data[,k+K*2]
}
I <- array(dim = c(N,K,J))
for(j in 1:J){
for(k in 1:K){
I[ ,k ,j] <- med_data[,K*(j-1)+k]
}
}
I_list = lapply(1:dim(I)[3], function(j) { I[,,j]})
med_data$X_c = med_data$X - mean(med_data$X)
med_data$Y_c = med_data$Y - mean(med_data$Y)
stan_data <- list(N = N, J = J, K = K, X = med_data$X_c, Y = med_data$Y_c,
I = I_list
)
and here is how I have tried to read this and call it in stan:
data {
int<lower=1> N; // Number of observations
int<lower=1> K; // Number of indicators per mediator (assuming 5 for both M1 and M2)
int<lower=1> J; // Number of mediators
matrix[N, K] I[J]; // Indicator data for mediator
vector[N] X; // Independent variable
vector[N] Y; // Dependent variable
}
.....
for (k in 1:K) {
if (k == 1) {
I[, k, 1] ~ normal(alpha1[k] + 1 * eta1, sigma_I1[k]);
I[, k, 2] ~ normal(alpha2[k] + 1 * eta2, sigma_I2[k]);
I[, k, 3] ~ normal(alpha3[k] + 1 * eta3, sigma_I3[k]);
} else {
I[, k, 1] ~ normal(alpha1[k] + lambda1[k] * eta1, sigma_I1[k]);
I[, k, 2] ~ normal(alpha2[k] + lambda2[k] * eta2, sigma_I2[k]);
I[, k, 3] ~ normal(alpha3[k] + lambda3[k] * eta3, sigma_I3[k]);
}
}
For the life of me I can’t get this data passing to work correctly. In stan it would be great if I can have a set of J matrixes of size [N,J]. Open to any pointers.