I am working on a 1-dimensional dynamic common factor model. The simpler version of this model (Stan code posted below) is essentially a simple common factor model with a random walk on the latent variable eta. This model runs fine, reasonably quick and the results make sense.
The more complex version (Stan code posted below) additionally introduces time-varying intercepts and slopes (factor loadings). The factor loadings are also allowed to be correlated over time. With this model, the chains hardly move and then quickly appear stuck. After some warmup iterations the chains reach similar lp__ values, but this can take some time. There are also plenty of divergent transitions, but these disappear as warmup progresses. However, chains remain stuck.
Currently, I run this with J=450, T=32, I=15.
Is this model simply overparameterized/not empirically identified or is there something else that is off or something more that can be done in terms of reparameterization?
SIMPLER MODEL
// 1D Dynamic Common Factor Model
data {
int J; // Number of politicians
int T; // Number of time points
int I; // Number of indicators
array[J, T, I] real y; // Observed indicators (i) on subjects (j) over time (t)
}
parameters {
vector[I] beta; // Indicator-specific intercepts
vector<lower=0.0>[I-1] Lambda_raw; // Unconstrained factor loadings, positive to address sign indeterminacy
array[J] vector[T] eta_raw; // Unconstrained factor scores
vector<lower=0>[I] Theta; // Diagonal covariance matrix (independent errors)
}
transformed parameters {
vector<lower=0>[I] Lambda; // Factor loadings
matrix[J,T] eta_t; // Factor scores
Lambda[1] = 1; // Constrained to address scale indeterminacy
for (i in 2:I) {
Lambda[i] = Lambda_raw[i-1];
}
// Non-centered parameterization for eta_t
for (j in 1:J) {
eta_t[j, 1] = eta_raw[j][1]; // Initial time point receives a standard normal prior
for (t in 2:T) {
eta_t[j, t] = eta_t[j, t - 1] + eta_raw[j][t] * 1; // Implies a random walk: eta[j, t] ~ normal(eta[j, t - 1], 1);
}
}
}
model {
// priors
beta ~ std_normal();
Lambda_raw ~ lognormal(1,1);
for (j in 1:J) {
eta_raw[j] ~ std_normal(); // fixed variance to address scale inditermancy and numeric stability
}
Theta ~ std_normal();
// likelihood
for (j in 1:J) {
for (t in 1:T) {
for (i in 1:I) {
y[j, t, i] ~ normal(beta[i] + Lambda[i] * eta_t[j, t], Theta[i]);
}
}
}
}
MORE COMPLEX MODEL
// 1D Dynamic Factor Model with Time-Varying and Correlated Factor Loadings
// Factor loadings at I=1 fixed
data {
int J; // Number of politicians
int T; // Number of time points
int I; // Number of indicators
array[J, T, I] real y; // Observed indicators (i) on subjects (j) over time (t)
}
parameters {
matrix<lower=0.0>[I, T] Theta; // Diagonal covariance matrix (independent errors)
matrix[I, T] beta; // Indicator- and time-specific intercepts
matrix<lower=0.0>[T, I-1] Lambda_raw; // Unconstrained factor loadings, positive to address sign indeterminacy
vector<lower=0>[T] Lambda_sigma; // Scale vector for the prior on factor loadings
cholesky_factor_corr[T] L_Omega_lambda; // Cholesky factor of the correlation matrix for factor loadings
array[J] vector[T] eta_raw; // Unconstrained factor scores
}
transformed parameters {
matrix[J,T] eta_t; // Factor scores
matrix[I-1,T] Lambda_free; // Factor loadings
matrix[I,T] Lambda;
// Non-centered parameterization for Lambda
Lambda_free = (diag_pre_multiply(Lambda_sigma, L_Omega_lambda) * Lambda_raw)'; // implies Lambda ~ lognormal(1,Sigma)
Lambda = exp(append_row(rep_row_vector(0, T), Lambda_free)); // I=1 constrained to address scale indeterminacy
// Non-centered parameterization for eta_t
for (j in 1:J) {
eta_t[j, 1] = eta_raw[j][1]; // Initial time point receives a standard normal prior
for (t in 2:T) {
eta_t[j, t] = eta_t[j, t - 1] + eta_raw[j][t] * 1; // Implies a random walk: eta[j, t] ~ normal(eta[j, t - 1], 1);
}
}
}
model {
// priors
for(i in 1:I) {
Theta[i] ~ std_normal();
}
for(i in 1:I) {
beta[i] ~ std_normal();
}
for(i in 1:I-1) {
Lambda_raw[,i] ~ std_normal(); // normal(0, 0.5)
}
Lambda_sigma ~ student_t(3, 0, 1); // normal(0, 0.5)
L_Omega_lambda ~ lkj_corr_cholesky(2); //5-10
for (j in 1:J) {
eta_raw[j] ~ std_normal(); // fixed variance to address scale inditermancy and numeric stability
}
// likelihood
for (j in 1:J) {
for (t in 1:T) {
for (i in 1:I) {
y[j, t, i] ~ normal(beta[i, t] + Lambda[i, t] * eta_t[j, t], Theta[i, t]);
}
}
}
}