Hello All
I am new to Stan and so my apologies if I missed something while posting the problem, where any help will be much appreciated. I am getting the error below, while trying to run STAN
Based on a similar issue posted in a different thread from a few years ago, it mentioned that having a lower bound will help solve the issue. However, a lower bound is included in the code below.
Copy of Error
SAMPLING FOR MODEL ‘bppca’ NOW (CHAIN 1).
Chain 1: Rejecting initial value:
Chain 1: Error evaluating the log probability at the initial value.
Chain 1: Exception: multi_normal_lpdf: LDLT_Factor of covariance parameter is not positive definite. last conditional variance is -21.8925. (in ‘modele0858283940_bppca’ at line 55)
****
Code Where Stan is Being Called
****
Nsamples = 1000
Nchains = 3
model_file = "stan_code/bppca.stan"
smod = stan_model(model_file)
D_max = 5
sa.list = list()
log_lik.list = list()
looic.list = list()
for(d in 1:D_max){
print(d)
pca_data <- list(Y = Y, N = N, P = P, D = d, Fam = Fam, Site = Site, ns = ns, nf = nf)
set.seed(314)
sa.list[[d]] = sampling(smod, data= pca_data, iter=Nsamples, chains=Nchains,init="random")
#log_lik.list[[d]] <- extract_log_lik(sa.list[[d]])
log_lik.list[[d]] <- extract(sa.list[[d]],"log_lik_marg")[[1]]
looic.list[[d]] = loo(log_lik.list[[d]])
save(sa.list,log_lik.list,looic.list,file="results/bppca_results.RData")
print("###############################")
}
Stan Model File
data{
int<lower=1> N; // number of subjects in dataset
int<lower=1> P; // dimension of data
int<lower=1> D; // dimension of latent space
int<lower=1> ns; // number of sites
int<lower=1> nf; // number of families with more than one member
int<lower=1> Site[N]; // site ids
int<lower=0> Fam[N]; // family ids
vector[P] Y[N]; // responses
}
transformed data{
vector[P] eta;
eta = rep_vector(0,P);
}
parameters{
vector[D] Theta[N];
matrix[P,D] Lambda;
matrix[D,ns] A_site;
matrix[D,nf] B_fam;
matrix[P,ns] C_site;
matrix[P,nf] D_fam;
real<lower=0> sigma2_a;
real<lower=0> sigma2_b;
real<lower=0> sigma_c;
real<lower=0> sigma_d;
real<lower=0> sigma_eps;
}
transformed parameters{
vector[D] mu[N];
vector[P] nu[N];
for(n in 1:N){
if(Fam[n] == 0) mu[n] = col(A_site,Site[n]);
if(Fam[n]>0) mu[n] = col(A_site,Site[n]) + col(B_fam,Fam[n]);
}
for(n in 1:N){
if(Fam[n] == 0) nu[n] = col(C_site,Site[n]);
if(Fam[n]>0) nu[n] = col(C_site,Site[n]) + col(D_fam,Fam[n]);
}
}
model{
sigma2_a ~ uniform(0,1);
sigma2_b ~ uniform(0,1-sigma2_a);
sigma_c ~ cauchy(0,1);
sigma_d ~ cauchy(0,1);
sigma_eps ~ cauchy(sigma_c^2+sigma_d^2,1);
to_vector(A_site) ~ normal(0,sqrt(sigma2_a));
to_vector(B_fam) ~ normal(0,sqrt(sigma2_b));
to_vector(C_site) ~ normal(0,sigma_c);
to_vector(D_fam) ~ normal(0,sigma_d);
for(d in 1:D)
Lambda[,d] ~ normal(0, 1);
for(n in 1:N){
if(Fam[n] == 0){
Theta[n] ~ multi_normal(mu[n], diag_matrix(rep_vector(1-sigma2_a,D)));
Y[n] ~ multi_normal(Lambda * Theta[n] + nu[n], diag_matrix(rep_vector(sigma_eps^2-sigma_c^2,P)));
}
if(Fam[n] > 0){
Theta[n] ~ multi_normal(mu[n], diag_matrix(rep_vector(1-sigma2_a-sigma2_b,D)));
Y[n] ~ multi_normal(Lambda * Theta[n] + nu[n], diag_matrix(rep_vector(sigma_eps^2-sigma_c^2-sigma_d^2,P)));
}
}
}
generated quantities{
//matrix[P,P] Eig_vec_lambda;
//vector[P] Eig_val_lambda;
//matrix[P,P] W;
cov_matrix[P] Q;
vector[N] log_lik_marg;
vector[N] log_lik;
//Eig_vec_lambda = eigenvectors_sym(Lambda*Lambda');
//Eig_val_lambda = eigenvalues_sym(Lambda*Lambda');
Q = Lambda * Lambda' + diag_matrix(rep_vector(sigma_eps^2,P));
//W = Lambda * Lambda';
for (n in 1:N){
log_lik_marg[n] = multi_normal_lpdf(Y[n] | eta, Q);
if(Fam[n] == 0){
log_lik[n] = multi_normal_lpdf(Y[n] | Lambda * Theta[n] + nu[n], diag_matrix(rep_vector(sigma_eps^2-sigma_c^2,P)));
}
if(Fam[n] > 0){
log_lik[n] = multi_normal_lpdf(Y[n] | Lambda * Theta[n] + nu[n], diag_matrix(rep_vector(sigma_eps^2-sigma_c^2-sigma_d^2,P)));
}
}
}
PS: This is a publicly available code and I am trying to recreate the results of a paper
EDIT: @maxbiostat edited this post for syntax highlighting.