I wrote a two-component mixture model on estimating the true effect size of GWAS summary statistics for phenotypes with genetic correlation (Stan code pasted at the end). The model works fine on small dataset (N \approx 200, M = 2), and takes around 20 minutes to run 100 warmup + 1000 iterations for 4 chains in parallel. However, when I tried larger dataset (N \approx 3 \times 10^5, M = 2). It has taken around 5 days to get 500 samples and hasn’t converged yet (most Rhat is around 1.1 and Neff around 10; sampler parameters from stansummary also pasted below).
name,Mean,MCSE,StdDev,5%,50%,95%,N_Eff,N_Eff/s,R_hat
"lp__",-4.01751e+07,1.96137e+07,9.6404e+07,-2.89641e+08,-1.43121e+06,756479,24.1586,inf,1.01487
"accept_stat__",0.999343,0.000218408,0.00346515,0.998102,0.999955,1,251.712,inf,0.99805
"stepsize__",3.48344e-05,2.06904e-06,4.04532e-06,3.21151e-05,3.2273e-05,4.23705e-05,3.82268,inf,1.57626
"treedepth__",9.52953,0.231384,1.07193,7,10,10,21.4616,inf,1.01066
"n_leapfrog__",874.465,71.2052,320.677,127,1023,1023,20.2821,inf,1.01249
"divergent__",0,0,0,0,0,0,508,inf,-nan
"energy__",4.32656e+07,2.07136e+07,1.0122e+08,-417360,1.91271e+06,3.02495e+08,23.8792,inf,1.01533
Another thing that doesn’t work for the larger dataset is the generated quantity gamma, which is the probability of an observation ending up in each mixture component. Most values are -nan. I would appreciate it for any suggestion to debug the model and speed it up!
data {
int<lower=0> N; // number of loci
int<lower=1> M; // number of phenotypes
matrix[N, M] B; // observed effect sizes
matrix[N, M] SE; // standard errors
int<lower=1> K; // number of mixture components (K = 2 for this model)
}
transformed data{
vector[M] zeros;
vector[K] ones;
vector[M] SE_vec[N];
zeros = rep_vector(0, M);
ones = rep_vector(1, K);
for (n in 1:N) {
SE_vec[n] = to_vector(SE[n]);
}
}
parameters {
simplex[K] pi; // mixing proportions
cholesky_factor_corr[M] L_e; // cholesky factor of correlation matrix of errors
cholesky_factor_corr[M] L_b; // cholesky factor of correlation matrix of bias
cholesky_factor_corr[M] L_g; // cholesky factor of correlation matrix of effect
vector[M] beta_vec[N]; // true effect size
vector<lower=0>[M] tau_0; // standard deviation of each phenotype (null component)
vector<lower=0>[M] tau_1; // standard deviation of each phenotype (nonnull component)
}
transformed parameters{
matrix[M, M] Sigmas[K];
Sigmas[1] = diag_pre_multiply(tau_0, L_b);
Sigmas[2] = diag_pre_multiply(tau_1, L_g);
}
model {
real betaloglik[N, K];
tau_0 ~ double_exponential(0, 1e-3);
tau_1 ~ cauchy(0, 2.5);
L_e ~ lkj_corr_cholesky(2.0);
L_b ~ lkj_corr_cholesky(2.0);
L_g ~ lkj_corr_cholesky(2.0);
pi ~ dirichlet(ones);
for (n in 1:N) {
for (k in 1:K) {
betaloglik[n, k] = log(pi[k]) + multi_normal_cholesky_lpdf(B[n] | beta_vec[n], diag_pre_multiply(SE_vec[n], L_e)) + multi_normal_cholesky_lpdf(beta_vec[n] | zeros, Sigmas[k]);
}
target += log_sum_exp(betaloglik[n]);
}
}
generated quantities {
matrix[M,M] cor_e;
matrix[M,M] cor_b;
matrix[M,M] cor_g;
matrix[N,K] gamma; // probability of being in each mixture component
cor_e = multiply_lower_tri_self_transpose(L_e);
cor_b = multiply_lower_tri_self_transpose(L_b);
cor_g = multiply_lower_tri_self_transpose(L_g);
for (n in 1:N) {
row_vector[K] gamma_unnormalized;
for (k in 1:K) {
gamma_unnormalized[k] = pi[k] * exp(multi_normal_cholesky_lpdf(B[n] | beta_vec[n], diag_pre_multiply(SE_vec[n], L_e)) + multi_normal_cholesky_lpdf(beta_vec[n] | zeros, Sigmas[k]));
}
gamma[n] = gamma_unnormalized/sum(gamma_unnormalized);
}
}