Sending 3d array from R to Stan

I am working with a scenario where I am simulating a mediation model with various numbers of latent mediators. I am trying to make my stan code able to run regardless of the number of mediators. Ok so here is where I am at. I am simulating my data in R using the package lavaan with the following code without any problems:

  pop_model = paste('
# Measurement model for the latent mediator
     M1 =~ ', lambda[1], '*I1.1 + 
           ', lambda[2], '*I1.2 +
           ', lambda[3], '*I1.3 
           
     M2 =~ ', lambda[1], '*I2.1 + 
           ', lambda[2], '*I2.2 +
           ', lambda[3], '*I2.3 
     
     M3 =~ ', lambda[1], '*I3.1 + 
           ', lambda[2], '*I3.2 +
           ', lambda[3], '*I3.3 
                           
# Regressions 
  M1 ~ ',beta[1],'*X
  M2 ~ ',beta[2],'*X
  M3 ~ ',beta[3],'*X

  Y ~ ',beta[6],'*X + 
     ',beta[1],'*M1 + 
     ',beta[2],'*M2 +
     ',beta[3],'*M3 

')

Now I have gotten Stan to work just fine if I send the collection of indicators for the mediators one at a time. with the following R code:

 I1 <- array(dim = c(N,K))
  I2 <- array(dim = c(N,K))
  I3 <- array(dim = c(N,K))

  for(k in 1:K){
      I1[ ,k] <- med_data[,k]
      I2[ ,k] <- med_data[,k+K]
      I3[ ,k] <- med_data[,k+K*2]
  }
......
stan_data <- list(N = N, J = J, K = K, X = med_data$X_c, Y = med_data$Y_c,
                    I1 = I1,
                    I2 = I2,
                    I3 = I3
                    )

and the code in stan that works to interpret this pass is as follows:

data {
  int<lower=1> N;            // Number of observations
  int<lower=1> K;            // Number of indicators per mediator (assuming 5 for both M1 and M2)
  
  matrix[N, K] I1;           // Indicator data for mediator 1
  matrix[N, K] I2;           // Indicator data for mediator 2
  matrix[N, K] I3;           // Indicator data for mediator 3
  vector[N] X;               // Independent variable
  vector[N] Y;               // Dependent variable
........
for (k in 1:K) {
    if (k == 1) {
          I1[, k] ~ normal(alpha1[k] + 1 * eta1, sigma_I1[k]);
          I2[, k] ~ normal(alpha2[k] + 1 * eta2, sigma_I2[k]); 
          I3[, k] ~ normal(alpha3[k] + 1 * eta3, sigma_I3[k]);
    } else {
          I1[, k] ~ normal(alpha1[k] + lambda1[k] * eta1, sigma_I1[k]);
          I2[, k] ~ normal(alpha2[k] + lambda2[k] * eta2, sigma_I2[k]);
          I3[, k] ~ normal(alpha3[k] + lambda3[k] * eta3, sigma_I3[k]);
    }  
  }
}

Where I run into issues is when I try to generalize this setup. I want to be able to pass instead of 3 explicitly stated set of indicators I want to send a group of J mediators. This is what I have tried in R to pass this 3d object:

I1 <- array(dim = c(N,K))
  I2 <- array(dim = c(N,K))
  I3 <- array(dim = c(N,K))

  for(k in 1:K){
      I1[ ,k] <- med_data[,k]
      I2[ ,k] <- med_data[,k+K]
      I3[ ,k] <- med_data[,k+K*2]
  }
  
  I <- array(dim = c(N,K,J))
  for(j in 1:J){
    for(k in 1:K){
      I[ ,k ,j] <- med_data[,K*(j-1)+k]
    }
  }
  
  I_list = lapply(1:dim(I)[3], function(j) { I[,,j]})
  
  med_data$X_c = med_data$X - mean(med_data$X)
  med_data$Y_c = med_data$Y - mean(med_data$Y)
  
  stan_data <- list(N = N, J = J, K = K, X = med_data$X_c, Y = med_data$Y_c,
                    I = I_list
                    )

and here is how I have tried to read this and call it in stan:

data {
  int<lower=1> N;            // Number of observations
  int<lower=1> K;            // Number of indicators per mediator (assuming 5 for both M1 and M2)
  int<lower=1> J;            // Number of mediators
  matrix[N, K] I[J];        // Indicator data for mediator 
  vector[N] X;               // Independent variable
  vector[N] Y;               // Dependent variable
}
.....
for (k in 1:K) {
    if (k == 1) {
          I[, k, 1] ~ normal(alpha1[k] + 1 * eta1, sigma_I1[k]);
          I[, k, 2] ~ normal(alpha2[k] + 1 * eta2, sigma_I2[k]); 
          I[, k, 3] ~ normal(alpha3[k] + 1 * eta3, sigma_I3[k]);
    } else {
          I[, k, 1] ~ normal(alpha1[k] + lambda1[k] * eta1, sigma_I1[k]);
          I[, k, 2] ~ normal(alpha2[k] + lambda2[k] * eta2, sigma_I2[k]);
          I[, k, 3] ~ normal(alpha3[k] + lambda3[k] * eta3, sigma_I3[k]);
    }  
  }

For the life of me I can’t get this data passing to work correctly. In stan it would be great if I can have a set of J matrixes of size [N,J]. Open to any pointers.

The new syntax is this, but I’m not 100% sure where RStan’s at as it’s pretty far behind core Stan at this point:

data {
  array[J] matrix[M, N] x;
}
transformed data {
  print("x = ", x);
}

Then in R, this will create a J x M x N = 2 x 3 x 4 array that you can pass to Stan:

x <- array(1:24, c(2, 3, 4))
data = list(x = x)

There’s also an easier way to write this:

for (k in 1:K) {
    if (k == 1) {
          I[, k, 1] ~ normal(alpha1[k] + 1 * eta1, sigma_I1[k]);
          I[, k, 2] ~ normal(alpha2[k] + 1 * eta2, sigma_I2[k]); 
          I[, k, 3] ~ normal(alpha3[k] + 1 * eta3, sigma_I3[k]);
    } else {
          I[, k, 1] ~ normal(alpha1[k] + lambda1[k] * eta1, sigma_I1[k]);
          I[, k, 2] ~ normal(alpha2[k] + lambda2[k] * eta2, sigma_I2[k]);
          I[, k, 3] ~ normal(alpha3[k] + lambda3[k] * eta3, sigma_I3[k]);
    }  
  }

as

I[, 1, 1] ~ normal(alpha1[1] + eta1, sigma_I1[1]);
I[, 1, 2] ~ normal(alpha2[1] + eta2, sigma_I2[1]); 
I[, 1, 3] ~ normal(alpha3[1] + eta3, sigma_I3[1]);
for (k in 2:K) {
  I[, k, 1] ~ normal(alpha1[k] + lambda1[k] * eta1, sigma_I1[k]);
  I[, k, 2] ~ normal(alpha2[k] + lambda2[k] * eta2, sigma_I2[k]);
  I[, k, 3] ~ normal(alpha3[k] + lambda3[k] * eta3, sigma_I3[k]);
}

I’d then try to collapse the alphas and `lambdas into single variables and try to group, but that’s just an efficiency issue.

This is true, but RStan on CRAN does finally have the new array syntax!