Hi!
Unfortunatly, the full model is really complicated to read because I tried to optimize it as much as I can. I remember it ran without this error, though.
I do not think it happens during initialization, because I see “iter 1/2000” appears and the first draws to be rejected before the error appears.
//
// This Stan program defines a latent factor model with
// environmental and CWM covariates,
// hierarchical trait effect on species responses and
// an error model on predictors
//
functions {
// Function to fill loading matrices
matrix fill_loadings(int D, int S, vector L_lower, vector L_diag){
matrix[S,D] L;
int idx2; //Index for the lower diagonal loadings
idx2 = 0;
for(i in 1:(D-1)) { for(j in (i+1):(D)){ L[i,j] = 0; } } //0 on upper diagonal
for(i in 1:D) L[i,i] = L_diag[i]; //Positive values on diagonal
for(j in 1:D) {
for(i in (j+1):S) {
idx2 = idx2+1;
L[i,j] = L_lower[idx2]; //Insert lower diagonal values in loadings matrix
}
}
return(L);
}
}
data {
// Flags
int<lower=0,upper=1> prior_only; //number of data points
// Integers
int<lower=0> N; //number of unique clods
int<lower=0> P; //number of unique plots
int<lower=0> K; //number of covariates (including intercept)
int<lower=0> TV; //number of traits for Vascular Plants (including intercept)
// int<lower=0> TM; //number of traits for Vascular Plants (including intercept)
int<lower=0> NV; //number of vascular plants data points
int<lower=0> NV_nz; //number of non-zero vascular plants data points
// int<lower=0> NM; //number of moss data points
int<lower=0> SV; //number of vascular species
// int<lower=0> SM; //number of moss species
int<lower=0> DV; //number of factors for vascular species
int<lower=0> DM; //number of factors for moss species
// Abundances
int VP_bin[NV]; //presence/absence of vascular plants
int VP_bin_rev[NV]; //presence/absence of vascular plants
int VP_nz[NV_nz]; //presence/absence of vascular plants
// int Moss_bin[NM]; //presence/absence of mosses
// int Moss_bin_rev[NM]; //presence/absence of mosses
// Species identities
int sp_idx_VP_bin[NV];
int sp_idx_VP_nz[NV_nz];
// int sp_idx_Moss_bin[NM];
// Clods identities
int clod_idx_VP_bin[NV];
int clod_idx_VP_nz[NV_nz];
// int clod_idx_Moss_bin[NM];
// Plots identities
int plot_idx_VP_bin[NV];
// int plot_idx_Moss_bin[NM];
// Covariates
matrix[N,K] X;
// Traits
matrix[SV,TV] Tr_VP;
matrix[SV,SV] A_phylo;
// Prior parameters
real lam_sigma_FS; //sd of factor scores
real lam_sigma_b_VP; //sd of species-specific slopes
real eta_rho; //correlation between scores
real sigma_g0;
real a_phylo; real b_phylo;
real mean_a_bin; real sd_a_bin; //common intercepts
}
transformed data {
int<lower=1> MV = DV*(SV-DV)+ DV*(DV-1)/2; // number of lower-triangular, non-zero loadings
}
parameters {
// Hyperparameters
vector<lower=0>[DV] sigma_FS_VP; //sd of vascular plants factor scores
vector<lower=0>[K+1] sigma_b_VP; //sd of vascular plants factor scores
cholesky_factor_corr[DV] chol_Rho_FS_VP_Unc; //correlation between scores
cholesky_factor_corr[K+1] chol_Rho_b_VP; //correlation between scores
matrix[TV,K+1] g_VP_raw; //traits slopes of species-response to environment
real<lower=0,upper=1> rho_phylo;
// Parameters
/// Raw factor scores
matrix[N,DV] FS_VP_raw; //Un-centered factor scores of vascular plants
/// Loadings
//// Vascular plants
vector[MV] L_lower_VP_Unc; //Lower diagonal loadings of vascular plants
vector[DV] L_diag_VP_Unc; //Diagonal loadings of vascular plants
/// Raw species-specific slopes
matrix[SV,K+1] b_theta_VP_raw;
}
transformed parameters {
matrix[TV,K+1] g_VP = g_VP_raw; //traits slopes of species-response to environment
/// Raw species-specific slopes
matrix[SV,K+1] b_theta_VP;
// Final species-specific slopes
matrix[SV,K] b_VP;
/// Species specific zero-inflation parameter
vector[SV] logit_theta;
// Unconstrained factor scores
matrix[N,DV] FS_VP_Unc = FS_VP_raw * diag_post_multiply(chol_Rho_FS_VP_Unc, sigma_FS_VP);
// Unconstrained loading matrices
matrix[SV,DV] L_VP_Unc = fill_loadings(DV, SV, L_lower_VP_Unc, L_diag_VP_Unc);
// Final covariances among species parameters
cholesky_factor_cov[K] chol_V = diag_post_multiply(chol_Rho_b_VP, sigma_b_VP);
cholesky_factor_cov[SV] chol_Phylo = cholesky_decompose(
rho_phylo * A_phylo + diag_matrix(rep_vector(1-rho_phylo,SV))
);
// Linear predictors
matrix[N,SV] log_mu_VP_Unc;
vector[NV] log_mu_VP_long_Unc;
vector[NV_nz] log_mu_VP_long_nz_Unc;
// Change the prior for the intercept of the abundances and zero inflation hyperparameters
g_VP[1,1] = 5 + g_VP_raw[1,1];
g_VP[1,K+1] = 2 + g_VP_raw[1,K+1];
// Compute final species-specific slopes
b_theta_VP = Tr_VP * g_VP + chol_Phylo * b_theta_VP_raw * chol_V;
b_VP = block(b_theta_VP, 1, 1, SV, K);
logit_theta = b_theta_VP[,K+1];
// Add regression of environmental predictors
for(s in 1:SV) log_mu_VP_Unc[,s] = X * b_VP[s,]';
// Add latent variable contributions
/// Vascular plants
log_mu_VP_Unc += FS_VP_Unc * L_VP_Unc';
// Transform everyhting into long format
for(n in 1:NV) log_mu_VP_long_Unc[n] = log_mu_VP_Unc[clod_idx_VP_bin[n],sp_idx_VP_bin[n]];
for(n in 1:NV_nz) log_mu_VP_long_nz_Unc[n] = log_mu_VP_Unc[clod_idx_VP_nz[n],sp_idx_VP_nz[n]];
}
model {
// Hyperparameters
target += exponential_lpdf(sigma_FS_VP | lam_sigma_FS); //sd of factor scores
target += exponential_lpdf(sigma_b_VP| lam_sigma_b_VP); //sd of vascular plants factor scores
target += lkj_corr_cholesky_lpdf(chol_Rho_FS_VP_Unc | eta_rho); //correlation between scores
target += lkj_corr_cholesky_lpdf(chol_Rho_b_VP | eta_rho); //correlation between scores
target += std_normal_lpdf(to_vector(g_VP_raw));
target += beta_lpdf(rho_phylo | a_phylo, b_phylo);
// Parameters
/// Raw factor scores
target += std_normal_lpdf(to_vector(FS_VP_raw)); //Un-centered factor scores of vascular plants
/// Loadings
//// Vascular plants
target += std_normal_lpdf(L_lower_VP_Unc); //Lower diagonal loadings of vascular plants
target += std_normal_lpdf(L_diag_VP_Unc); //Diagonal loadings of vascular plants
/// Raw species-specific slopes
target += std_normal_lpdf(to_vector(b_theta_VP_raw));
// likelihood including all constants
if (!prior_only) {
real log_of_theta[SV];
real log1m_of_theta[SV];
for(s in 1:SV){
log_of_theta[s] = bernoulli_logit_lpmf(1 | logit_theta[s]);
log1m_of_theta[s] = bernoulli_logit_lpmf(0 | logit_theta[s]);
}
// Vascular plants
for(n in 1:NV){
if (VP_bin[n] == 0){
target += log_sum_exp(log_of_theta[sp_idx_VP_bin[n]],
log1m_of_theta[sp_idx_VP_bin[n]] + poisson_log_lpmf(VP_bin[n] | log_mu_VP_long_Unc[n]) );
} else
target += log1m_of_theta[sp_idx_VP_bin[n]];
}
target += poisson_log_lpmf(VP_nz | log_mu_VP_long_nz_Unc);
}
}
EDIT : is there something I can try to solve this? Or is that a bug? I would be surprise the cholesky factorization of the 16*16 matrix would fill either my ram or disk space, but I do not know how the serializer works :)
Should I try to create intermediate objects to realize the different operations happening in these lines (diag matrix, matrix multiplication, cholesky factorization)?
Tomorrow, I will try to reproduce the error with a simplistic model.
Thank you very much!
Lucas