Hi,
I have benefited greatly from reading the posts on this forum and online stan code. One issue I encountered recently is that, while trying to replicate a confirmatory factor analysis stan example posted by Mike Lawrence, I found my code sampling an extremely small region. Mike’s code allows one factor to load on each outcome with a vector of factor loading index, but I attempted to build a “design matrix” that dictates the loadings among all factors and outcomes. I think that is the only difference between my version and the original one, but mine won’t converge at all, as seen in the two picture below, where the chains were barely moving.
I have attached my code below. Any suggestions would be welcomed! The design matrix idea kind of worked in another version of this model (using modeling implied covariance structure), so it indeed made me wonder.
data {
int<lower=1> N; // sample size
int<lower=1> P; // number of items
matrix[N,P] Y; // data matrix of order [N,P]
int<lower=1> D; // number of latent dimensions
matrix[P,D] Ldes; // design matrix for factor loadings
int<lower=1> M; // number of non-zero factor loadings
}
parameters {
vector<lower=0>[D] Lp; // first factor loading constrained positive
vector[M-D] Lr; // rest of non-zero factor loading
vector[P] nu; // item intercepts
vector<lower=0>[P] psi; // vector of residual variances
cholesky_factor_corr[D] fcorr; // factor correlation cholesky factor
matrix[D,N] fac_dist_helper; // helper for non-centered multivariate sampling
}
transformed parameters{
matrix[N,D] fac_scor; //person factor scores
matrix[P,D] L; // constrained factor loading matrix
for (i in 1:D){
for (j in 1:P){
L[j,i] <- 0;
}
}
// assign positive-constrained loading to first of each column
for (i in 1:D){
for (j in 1:P){
if (Ldes[j,i]==1){
L[j,i] <- Lp[i];
break;
}
}
}
// assign other factor loading
{
int temp;
temp <- 0;
for (i in 1:D){
for (j in 1:P){
if (Ldes[j,i]==1 && L[j,i]==0){
temp <- temp + 1;
L[j,i] <- Lr[temp];
}
}
}
}
// non-centered factor score multi-norm dist.
fac_scor <- transpose(fcorr * fac_dist_helper);
}
model {
matrix[D,P] Ltran = transpose(L);
// the priors
Lp ~ student_t(5,0.5,.5);
Lr ~ student_t(5,0,1);
fcorr ~ lkj_corr_cholesky(1);
psi ~ weibull(2,1);
to_vector(fac_dist_helper) ~ std_normal();
nu ~ normal(0,1);
//The likelihood
for(j in 1:P){
Y[,P] ~ normal( nu[j] + fac_scor * Ltran[,j] , psi[j] );
}
}
R code for the data.
D <-3
P <- 10
N <-300
mu_theta <-rep(0,D) # the mean of eta
mu_epsilon<-rep(0,P) # the mean of epsilon
Phi<-diag(rep(1,D))
Psi <- diag(c(0.2079, 0.19, 0.1525, 0.20, 0.36, 0.1875, 0.1875, 1.00, 0.27, 0.27))
l1 <- c(0.99, 0.00, 0.25, 0.00, 0.80, 0.00, 0.50, 0.00, 0.00, 0.00)
l2 <- c(0.00, 0.90, 0.25, 0.40, 0.00, 0.50, 0.00, 0.00, -0.30, -0.30)
l3<- c(0.00, 0.00, 0.85, 0.80, 0.00, 0.75, 0.75, 0.00, 0.80, 0.80)
L <-cbind(l1,l2,l3) # the loading matrix
Theta <-mvrnorm(N, mu_theta, Phi) # sample factor scores
Epsilon <-mvrnorm(N, mu_epsilon, Psi) # sample error vector
Y<-Theta%*%t(L)+Epsilon# generate observable data
Nchains <- 4
Niter <- 1500
Ldes <- matrix(c(1,0,1,0,1,0,1,0,0,0,
0,1,1,1,0,1,0,0,1,1,
0,0,1,1,0,1,1,0,1,1), nrow = P, ncol = D)
M=sum(Ldes)
fa.data <-list(P=P,N=N,Y=Y,D=D,Ldes=Ldes,M=M)
# a function to generate intial values that are slightly jittered for each chain.
init_fun2 = function() {
init.values<-list(sigma_phi=rep(0.5,D)+runif(D,0,.1),
psi=rep(.2,P)+runif(P,0,.1),
Lp = rep(0.5,D) + runif(D,0,.1),
Lr = rep(0.1, sum(Ldes)-D),
nu = rep(0.1, P),
mu_theta_free = rep(0,D) + runif(D,0,.1))
return(init.values);
}
#compile the model
library("parallel")
fa.model2<- stan(file = "fa - alt.stan",
data = fa.data,
chains = 4, iter = Niter, init = init_fun2, seed = 42,
pars=c("L","psi","nu"))
print(fa.model2,probs = c(0.025, 0.5, 0.975))