Hi

I am working with latent variable modeling (SEM, IRT) with data augmentation for the latent variables. I can save the log-likelihood based on the model to estimate LOO, WAIC.

The issue I have come to is that for latent variable modeling the log-likelihood should marginalize out the factor scores from data augmentation. Some programs do this calculation outside Stan or JAGS. I would like to calculate this marginal model log-likelihood in Stan so I can use in a general format.

Here is an example of a Confirmatory Factor Analysis Stan code with how I am calculating the conditional likelihood ( log_lik[i] = normal_lpdf(X[i] | mu[i],var_p);), is there a way to calculate the marginal likelihood by marginalizing out the factor scores (FS)?

Thank you

cfa_stan_fv <- "

data{

int N; // sample size

int P; // number of variables

int D; // number of dimensions

vector[P] X[N]; // data matrix of order [N,P]

}

parameters{

vector[P] b; // intercepts

vector<lower=0>[P] lam; // factor loadings

vector[D] FS[N]; // factor scores, matrix of order [N,D]

cholesky_factor_corr[D] L_corr_d; // cholesky correlation between factors

vector<lower=0>[P] var_p; // residual variance for each variable

}

transformed parameters{

vector[D] M; // factor means

vector<lower=0>[D] Sd_d; // factor standard deviations

vector[P] mu[N]; // predicted values

matrix[D,D] Ly;// cholesky of the corr between factors

for (m in 1:D) {

M[m] = 0;

Sd_d[m] = 1;}

Ly = diag_pre_multiply(Sd_d, L_corr_d);

for(i in 1:N){

mu[i,1] = b[1] + lam[1]*FS[i,1];

mu[i,2] = b[2] + lam[2]*FS[i,1];

mu[i,3] = b[3] + lam[3]*FS[i,1];

mu[i,4] = b[4] + lam[4]*FS[i,2];

mu[i,5] = b[5] + lam[5]*FS[i,2];

mu[i,6] = b[6] + lam[6]*FS[i,2];

mu[i,7] = b[7] + lam[7]*FS[i,3];

mu[i,8] = b[8] + lam[8]*FS[i,3];

mu[i,9] = b[9] + lam[9]*FS[i,3];

}

}

model{

L_corr_d ~ lkj_corr_cholesky(2);

b ~ normal(0, 100);

lam ~ normal(0.4,var_p);

var_p ~ cauchy(0,2.5);

for(i in 1:N){

X[i] ~ normal(mu[i],var_p);

FS[i] ~ multi_normal_cholesky(M, Ly);

}

}

generated quantities{

vector[N] log_lik; ///// row log likelihood

real dev; /////// deviance

real log_lik0; ///// global log likelihood

corr_matrix[D] Rho; /// correlation matrix

Rho = multiply_lower_tri_self_transpose(L_corr_d);

for(i in 1:N){

for(j in 1:P){

log_lik[i] = normal_lpdf(X[i] | mu[i],var_p);

}}

log_lik0 = sum(log_lik); // global log-likelihood

dev = -2*log_lik0; // model deviance

}

"