Information criteria for multivariate models

Hi!

Here is the stan code for a model including covariates. We already discussed these kinds of model (close to bayesian factor analysis) here.

data{
  int N; // sample size
  int P; // number of species
  int K; //Number of predictors
  int D; // number of dimensions
  matrix[N,K] X; //Predictor matrix
  int<lower=0> Y[N,P]; // data matrix of order [N,P]
}
transformed data{
  int<lower=1> M;
  vector[D] FS_mu; // factor means
  vector<lower=0>[D] FS_sd; // factor standard deviations
  M = D*(P-D)+ D*(D-1)/2;  // number of lower-triangular, non-zero loadings
  for (m in 1:D) {
    FS_mu[m] = 0; //Mean of factors = 0
    FS_sd[m] = 1; //Sd of factors = 1
  }
}
parameters{
  //Parameters
  vector[N] alpha; //Row intercepts
  row_vector[P] b0; // Intercept per species
  matrix[K,P] b; //Coefficient per species
  vector[M] L_lower; //Lower diagonal loadings
  vector<lower=0>[D] L_diag; //Positive diagonal elements of loadings
  matrix[N,D] FS; //Factor scores, matrix of order [N,D]
  cholesky_factor_corr[D] Rho; //Correlation matrix between factors
  //Hyperparameters
  real<lower=0> sigma_a; //Variance of the row intercepts
  real<lower=0> sigma_b0; //Variance of the species intercepts
  vector<lower=0>[K] sigma_b; //Variance of the species slopes
  //real mu_lower; //Mean of lower-diag loadings
  //real<lower=0> sigma_lower; //Sd of lower-diag loadings
  real<lower=0> eta; //Parameter of LKJ prior for Rho
}
transformed parameters{
  cholesky_factor_cov[P,D] lambda; //Final matrix of laodings
  matrix[D,D] Ld; // cholesky decomposition of the covariance matrix between factors
  matrix[N,P] temp; //intermediate predictor
  matrix<lower=0>[N,P] mu; // predicted values
  
  // Correlation matrix of factors
  Ld = diag_pre_multiply(FS_sd, Rho);
  
  {
    int idx2; //Index for the lower diagonal loadings
    idx2 = 0;
    
    // Constraints to allow identifiability of loadings
  	 for(i in 1:(D-1)) { for(j in (i+1):(D)){ lambda[i,j] = 0; } } //0 on upper diagonal
  	 for(i in 1:D) lambda[i,i] = L_diag[i]; //Positive values on diagonal
  	 for(j in 1:D) {
  	   for(i in (j+1):P) {
  	     idx2 = idx2+1;
  	     lambda[i,j] = L_lower[idx2];
  	   }
  	 }
  }
  
  // Predictor
  temp = FS * lambda' + X * b;
  for(n in 1:N) mu[n] = exp(alpha[n] + b0 + temp[n]);
  
}
model{
  // Hyperpriors
  sigma_a ~ gamma(2,0.1); //Row intercepts hyperprior
  sigma_b0 ~ gamma(2,0.1); //Species intercept effects hyperprior
  for(k in 1:K) sigma_b[k] ~ gamma(2,0.1); //Species slopes hyperprior
  //mu_lower ~ normal(0,0.1);//Mean of lower loadings
  //sigma_lower ~ gamma(2,0.1); //Variance of lower loadings
  eta ~ gamma(2,0.1); //Parameter of the cholesky prior for FS correlation structure
  
  // Priors
  alpha ~ normal(0, sigma_a); //Regularizing prior for row intercepts
  b0 ~ normal(0, sigma_b0); //Regularizing prior for species intercepts
  for(k in 1:K) b[k,] ~ normal(0, sigma_b[k]); //Regularizing prior for species slopes
  //L_diag ~ normal(0,1); //Prior for diagonal elements
  for(d in 1:D) L_diag[d] ~ chi_square(N-d);
  L_lower ~ normal(0,1); //Prior for lower loadings
  Rho ~ lkj_corr_cholesky(eta); //Regularizing prior for Rho
  
  for(i in 1:N){	
    Y[i,] ~ poisson(mu[i]);
    FS[i,] ~ multi_normal_cholesky(FS_mu, Ld);
  }
}
generated quantities{
  matrix[P,P] cov_lambda;
  
  cov_lambda = multiply_lower_tri_self_transpose(lambda);
}

Thanks for your help!