Hi there,
After some time I got back to this problem. I implemented the sign correction in the generated quantities block. I tried first with a model not including the latent factors, only the factor loadings, similar to my model specifiaction above. The model converges and retrieves the true parameters.
Here is the stan code for that model (mar_lat_noF.stan
)
data {
int<lower=1> S; // Number of Secies
int<lower=1> T; // Numer of years
int <lower=1> D; // Number of latent dimensions
vector[S] Y[T]; // The outcome matrix
}
transformed data{
int<lower=1> M;
M = D*(S-D)+ D*(D-1)/2; // number of non-zero loadings
}
parameters {
vector[S] r; // Vector of intrinsic growth rates
vector[S] a; // matrix with competitive parameter
vector[M] L_t; // lower diagonal elements of L
vector[D] L_d; // diagonal elements of L
vector<lower=0>[S] psi; // vector of variances
}
transformed parameters{
matrix[S,D] L; //lower triangular factor loadings Matrix
cov_matrix[S] Omega; // Covariance matrix
matrix[S,S] L_Omega;
{
int idx;
idx = 0;
for(i in 1:(D-1)){
for(j in (i+1):D){
L[i,j] = 0; //constrain the upper triangular elements to zero
}
}
for(j in 1:D) {
L[j,j] = L_d[j];
for(i in (j+1):S) {
idx = idx + 1;
L[i,j] = L_t[idx];
}
}
}
Omega = L*L';
for (i in 1:rows(Omega))
Omega[i,i] = Omega[i,i] + psi[i];
L_Omega = cholesky_decompose(Omega);
}
model {
// the priors
L_d ~ normal(0,1);
L_t ~ normal(0,1);
psi ~ exponential(2);
r ~ normal(0,1);
a ~ normal(0,1);
//The likelihood
for (t in 2:T)
Y[t] ~ multi_normal_cholesky(r + diag_matrix(a)*Y[t-1], L_Omega);
}
generated quantities{
matrix[S,S] Rho; // Correlation matrix
vector[S] sde; // species' sd
matrix[S,D] L_invar = L; // constrained factor loadings
for(d in 1:D){
if(L[d,d]<0){
L_invar[,d] = -1 * L[,d];
}
}
for(i in 1:S)
for(j in 1:S)
Rho[i,j] = Omega[i,j]/sqrt(Omega[i,i] * Omega[j,j]);
sde = sqrt(diagonal(Omega));
}
Both Mauricio’s code and @ldeschamps’s code have explicitly included the latent factors in their models. I tried to implement a stan model including them (mar_lat.stan
), but assuming no correlation between latent factors:
data {
int<lower=1> S; // Number of Secies
int<lower=1> T; // Numer of years
int <lower=1> D; // Number of latent dimensions
vector[S] Y[T]; // The outcome matrix
}
transformed data{
int<lower=1> M;
M = D*(S-D)+ D*(D-1)/2; // number of non-zero loadings
}
parameters {
vector[S] r; // Vector of intrinsic growth rates
vector[S] a; // matrix with competitive parameter
vector[M] L_t; // lower diagonal elements of L
vector[D] L_d; // diagonal elements of L
vector<lower=0>[S] psi; // vector of variances
matrix[D,T-1] F;
}
transformed parameters{
matrix[S,D] L; //lower triangular factor loadings Matrix
matrix[S,T-1] rand_eff;
vector[S] Mu[T-1];
{
int idx;
idx = 0;
for(i in 1:(D-1)){
for(j in (i+1):D){
L[i,j] = 0; //constrain the upper triangular elements to zero
}
}
for(j in 1:D) {
L[j,j] = L_d[j];
for(i in (j+1):S) {
idx = idx + 1;
L[i,j] = L_t[idx];
}
}
}
rand_eff = L*F;
for(t in 1:(T-1)){
Mu[t,] = r + diag_matrix(a)*Y[t] + rand_eff[,t];
}
}
model {
// the priors
L_d ~ normal(0,1);
L_t ~ normal(0,1);
psi ~ exponential(2);
r ~ normal(0,1);
a ~ normal(0,1);
to_vector(F) ~ normal(0,1);
//The likelihood
for (t in 2:T){
for(s in 1:S){
Y[t,s] ~ normal(Mu[t-1,s], psi[s]);
}
}
}
generated quantities{
matrix[S,S] Omega;
matrix[S, S] Rho;
vector[S] sde;
matrix[S,D] L_invar = L;
matrix[D,T-1] F_invar = F;
for(d in 1:D){
if(L[d,d]<0){
L_invar[,d] = -1 * L[,d];
F_invar [,d] = -1 * F[,d];
}
}
Omega = L_invar*L_invar'+diag_matrix(psi);
for(i in 1:S)
for(j in 1:S)
Rho[i,j] = Omega[i,j]/sqrt(Omega[i,i] * Omega[j,j]);
sde = sqrt(diagonal(Omega));
}
It didn’t go well. I had divergent transitions, low n_eff, big Rhats and low bfmi in the four chains (same fate when increasing adapt_delta
and max_treedepth
). The true parameter values for r
and a
were retrieved by the model, but the variance-covariance matrix Omega
was not.
Here are the traceplots for the sign-constrained factor loadings:
and here the summary:
mean sd 5.5% 94.5% n_eff Rhat4
L_invar[1,1] 0.09 0.03 0.04 0.13 531 1.00
L_invar[1,2] 0.00 0.00 0.00 0.00 NaN NaN
L_invar[1,3] 0.00 0.00 0.00 0.00 NaN NaN
L_invar[2,1] 0.12 0.04 0.06 0.17 325 1.02
L_invar[2,2] 0.08 0.04 0.01 0.14 152 1.02
L_invar[2,3] 0.00 0.00 0.00 0.00 NaN NaN
L_invar[3,1] -0.03 0.05 -0.10 0.05 236 1.00
L_invar[3,2] 0.05 0.07 -0.07 0.14 197 1.01
L_invar[3,3] 0.10 0.04 0.02 0.16 90 1.05
L_invar[4,1] 0.05 0.08 -0.09 0.18 120 1.03
L_invar[4,2] 0.16 0.12 -0.09 0.29 80 1.06
L_invar[4,3] -0.15 0.13 -0.28 0.15 87 1.05
L_invar[5,1] -0.16 0.03 -0.20 -0.11 576 1.01
L_invar[5,2] -0.03 0.05 -0.10 0.04 263 1.01
L_invar[5,3] 0.02 0.04 -0.04 0.08 212 1.02
L_invar[6,1] 0.16 0.04 0.09 0.21 213 1.02
L_invar[6,2] 0.04 0.07 -0.07 0.14 175 1.02
L_invar[6,3] -0.09 0.04 -0.14 -0.01 82 1.06
L_invar[7,1] 0.08 0.05 -0.01 0.16 124 1.02
L_invar[7,2] -0.10 0.08 -0.19 0.05 81 1.05
L_invar[7,3] 0.04 0.10 -0.17 0.17 137 1.04
L_invar[8,1] -0.05 0.03 -0.10 0.00 646 1.00
L_invar[8,2] -0.04 0.04 -0.09 0.03 270 1.01
L_invar[8,3] -0.04 0.03 -0.09 0.01 152 1.03
L_invar[9,1] 0.03 0.06 -0.07 0.12 166 1.00
L_invar[9,2] 0.05 0.11 -0.15 0.21 156 1.02
L_invar[9,3] 0.19 0.07 0.04 0.25 43 1.09
L_invar[10,1] -0.11 0.04 -0.16 -0.05 387 1.00
L_invar[10,2] -0.05 0.05 -0.13 0.04 222 1.01
L_invar[10,3] -0.06 0.04 -0.11 0.01 106 1.03
All of this makes me think I have not specified the model correctly.
I tried another model where I include correlation among factors (mar_lat2.stan
), with similar failure:
data {
int<lower=1> S; // Number of Secies
int<lower=1> T; // Numer of years
int <lower=1> D; // Number of latent dimensions
vector[S] Y[T]; // The outcome matrix
}
transformed data{
int<lower=1> M;
vector[D] FS_mu; // vector of latent factor means
vector<lower=0>[D] FS_sd; // vector of latent factor sd
M = D*(S-D)+ D*(D-1)/2; // number of non-zero loadings
for(m in 1:D){
FS_mu[m] = 0;
FS_sd[m] = 1;
}
}
parameters {
vector[S] r; // Vector of intrinsic growth rates
vector[S] a; // matrix with competitive parameter
vector[M] L_t; // lower diagonal elements of L
vector[D] L_d; // diagonal elements of L
vector<lower=0>[S] psi; // vector of variances
matrix[D,T-1] F;
cholesky_factor_corr[D] Rho_F;
}
transformed parameters{
matrix[S,D] L; //lower triangular factor loadings Matrix
matrix[D,D] L_F;
matrix[S,T-1] rand_eff;
vector[S] Mu[T-1];
L_F = diag_pre_multiply(FS_sd, Rho_F);
{
int idx;
idx = 0;
for(i in 1:(D-1)){
for(j in (i+1):D){
L[i,j] = 0; //constrain the upper triangular elements to zero
}
}
for(j in 1:D) {
L[j,j] = L_d[j];
for(i in (j+1):S) {
idx = idx + 1;
L[i,j] = L_t[idx];
}
}
}
rand_eff = L*F;
for(t in 1:(T-1)){
Mu[t,] = r + diag_matrix(a)*Y[t] + rand_eff[,t];
}
}
model {
// the priors
L_d ~ normal(0,1);
L_t ~ normal(0,1);
psi ~ exponential(2);
r ~ normal(0,1);
a ~ normal(0,1);
Rho_F ~ lkj_corr_cholesky(2);
//The likelihood
for (t in 2:T){
F[,t-1] ~ multi_normal_cholesky(FS_mu, L_F);
for(s in 1:S){
Y[t,s] ~ normal(Mu[t-1,s], psi[s]);
}
}
}
generated quantities{
matrix[S,S] Omega;
matrix[S, S] Rho;
vector[S] sde;
matrix[S,D] L_invar = L;
matrix[D,T-1] F_invar = F;
matrix[D,D] phi;
phi = multiply_lower_tri_self_transpose(Rho_F);
for(d in 1:D){
if(L[d,d]<0){
L_invar[,d] = -1 * L[,d];
F_invar [,d] = -1 * F[,d];
}
}
Omega = L_invar*phi*L_invar'+diag_matrix(psi);
for(i in 1:S)
for(j in 1:S)
Rho[i,j] = Omega[i,j]/sqrt(Omega[i,i] * Omega[j,j]);
sde = sqrt(diagonal(Omega));
}
Here is the R script simulating data and fitting the model
# Clean the environment
rm(list = ls())
# Load libraries
library(rstan)
library(mvtnorm)
library(rethinking)
library(truncnorm)
# simulate time series following a gompertz curve. Simulates species biomass on a log scale
Gompertz.sim <- function(init.pop, r, alpha, time, sp, Rho, sd_noise){
metacommunity <- matrix(NA, nrow = time, ncol = sp)
metacommunity[1,] <- init.pop
for (t in 2:time) {
metacommunity[t,]<- r + alpha*metacommunity[t-1,] +
t(rmvnorm2(1, Mu = rep(0, sp),
sigma = diag(sd_noise),
Rho = Rho))
}
return(metacommunity)
}
set.seed(2020)
P0<- 1 # Initial population biomass for all species on log scale
sp <- 10 # Number of species
t <- 100 # Number of timesteps
r <- rnorm(sp, 0.65, 0.1) # intrinsict grwoth rate for all species
alpha <- rtruncnorm(sp, a = -Inf, b = 0.95, mean = 0.9, sd = 0.1) # Density dependence for all sp
# Process error standard deviation for all sp
sd_noise <- matrix(0, nrow = sp, ncol = sp)
diag(sd_noise) <- rlnorm(sp, -1.5, 0.1)
Rho <- rlkjcorr(1, sp, 2) # Correlation matrix
Sigma <- sd_noise%*%Rho%*%sd_noise # variance-covariance matrix
# Simulate the community dynamics
Sim.data <- Gompertz.sim(init.pop = P0, r = r, alpha = alpha, time = t, sp = sp, sd_noise = sd_noise, Rho = Rho)
# Prepare the stan data
dat<-list(Y = Sim.data,
S = ncol(Sim.data),
T=nrow(Sim.data),
D = 3)
# Estimate the posterior
MAR_model <- stan(file = "mar_lat.stan", #file = "mar_lat2.stan" #file = "mar_lat_noF.stan"
data = dat,
chains = 4,
cores = 4,
iter = 4000,
control = list(adapt_delta = 0.8, max_treedepth = 10))
Any ideas?
Thanks for your help!
Cheers,
Alfonso