Could some one help me about these Rstan code? It seems to work extremely slow, and it reports when I run it in R:
SAMPLING FOR MODEL ‘anon_model’ NOW (CHAIN 1).
Chain 1:
Chain 1: Gradient evaluation took 0.090726 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 907.26 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1:
Chain 1:
Chain 1: Iteration: 1 / 10000 [ 0%] (Warmup)
The stan code are:
data {
int<lower=1> I; // Total number of observations
int<lower=1> Q; // Total numbers of covariates
int<lower=1> J; // Numbers of repeated measurements in time dimension
int<lower=1> S; // Dimension of alpha
int<lower=1> L; // Length of each beta
int<lower=1> K; // Total number of classes
int<lower=1, upper=K> g[I]; // Class of each observation
real y[I, Q, J]; // observed data
real Z[I, Q, J, S]; // covariates for alpha
real B[I, Q, J, L]; // covariates for beta
int<lower=0, upper=1> data_index[I, Q, J]; // Indicator for non-missing data
}
parameters {
vector[S] alpha[Q];
vector[L] beta[K, Q];
cholesky_factor_corr[Q] L_Sigma_Omega;
vector<lower=0>[Q] L_sigma;
vector[Q] omega[I];
vector<lower=0>[Q] sigma2;
}
model {
for (q in 1:Q) {
alpha[q] ~ multi_normal(rep_vector(0, S), diag_matrix(rep_vector(100, S))); // Prior for alpha
sigma2[q] ~ inv_gamma(1, 1); // Prior for sigma2
}
for (k in 1:K) {
for (q in 1:Q) {
beta[k, q] ~ multi_normal(rep_vector(0, L), diag_matrix(rep_vector(100, L))); // Prior for beta
}
}
L_Sigma_Omega ~ lkj_corr_cholesky(1);
L_sigma ~ cauchy(0, 2.5);
for (i in 1:I) {
matrix[Q, Q] L_Sigma;
L_Sigma = diag_pre_multiply(L_sigma, L_Sigma_Omega);
omega[i] ~ multi_normal_cholesky(rep_vector(0, Q), L_Sigma); // Prior for omega
}
// vectorized code
for (i in 1:I) {
for (q in 1:Q) {
vector[J] mu;
vector[S] alpha_q = alpha[q];
int g_i = g[i];
vector[L] beta_giq = beta[g_i, q];
vector[L] beta_Kq = beta[K, q];
for (j in 1:J) {
if (data_index[i, q, j]) {
vector[S] Z_iq = to_vector(Z[i, q, j, :]);
vector[L] B_iq = to_vector(B[i, q, j, :]);
if (g_i == K) {
mu[j] = dot_product(Z_iq, alpha_q) + dot_product(B_iq, beta_Kq) + omega[i, q];
} else {
mu[j] = dot_product(Z_iq, alpha_q) + dot_product(B_iq, beta_giq) + dot_product(B_iq, beta_Kq) + omega[i, q];
}
}
}
y[i, q] ~ normal(mu, sqrt(sigma2[q]));
}
}
}
As for the simulation, I write it in R with:
library(MASS)
library(Matrix)
library(“rstan”)
library(splines)
library(mvtnorm)
rstan_options(auto_write = TRUE)
Set dimensions
I ← 1000
Q ← 10
J ← 10
S ← 3
L ← 3
K ← 5
t_min ← 1
t_max ← 5
Create storage for data
g ← integer(I)
y ← array(NA, c(I, Q, J))
Z ← array(NA, c(I, Q, J, S))
B ← array(NA, c(I, Q, J, L))
data_index ← array(1, c(I, Q, J))
Generate data
set.seed(1234) # Set seed for reproducibility
Generate the matrix B(I,Q,J,L)
t ← matrix(nrow = I, ncol = Q) # first generate random t
for (i in 1:I) {
means ← runif(Q, min = t_min, max = t_max) # uniformly distributed means between t_min and t_max
sigmas ← runif(Q, min = 1, max = 2) # uniformly distributed standard deviations between 1 and 2
covariance ← diag(sigmas) # assume independence so covariance matrix is diagonal
t[i,] ← mvtnorm::rmvnorm(1, mean = means, sigma = covariance) # draw from multivariate normal
}
spline_basis ← ns(seq(t_min, t_max, by=0.1), df=L)
apply B-spline basis function
for (i in 1:I) {
for (q in 1:Q){
data_index_iq ← which(data_index[i,q,] == 1) # index of data for i-th subject at q-th response # index of data for i-th subject at q-th response
B[i,q,data_index_iq,] ← predict(spline_basis, t[i,data_index_iq])
}
}
normalize B-spline basis matrix
for (l in 1:L) {
B_hat ← B[ , , , l][!is.na(B[ , , , l])]
B[ , , , l] ← (B[ , , , l] - mean(B_hat)) / sd(B_hat)
}
for (i in 1:I) {
g[i] ← ((i-1) %% K) + 1
for (q in 1:Q) {
# Generate Z and B
Z[i, q, , ] ← MASS::mvrnorm(J, rep(0, S), diag(S))
}
}
Generate alpha, beta_0, beta_k, and Omega
alpha ← mvrnorm(Q, rep(1, S), diag(S))
beta_0 ← mvrnorm(1, rep(0, L), diag(L))
beta_k ← mvrnorm(K-1, rep(0, L), diag(L))
Omega ← matrix(0, I, Q)
epsilon ← matrix(0, I, Q)
Generate Omega and epsilon
for (i in 1:I) {
Generate Omega[i,]
p ← qr.Q(qr(matrix(rnorm(Q^2), Q)))
Sigma ← crossprod(p, p*(Q:1)*0.2)
Omega[i, ] ← mvrnorm(1, rep(0, Q), Sigma)
}
for (q in 1:Q) {
Generate sigma^2 from an Inverse Gamma distribution
sigma_sq ← 1 / rgamma(1, shape=1, scale=1)
Generate epsilon[ ,q] from a normal distribution with mean 0 and variance sigma^2
epsilon[ ,q] ← rnorm(I, mean=0, sd=sqrt(sigma_sq))
}
Generate Y
Y ← array(0, c(I, Q, J))
for (i in 1:I) {
for (q in 1:Q) {
# calculate B_iq and Z_iq
B_iq ← B[i, q, , ]
Z_iq ← Z[i, q, , ]
if (g[i] == K) {
Y_iq <- Z_iq %*% alpha[q,] + B_iq %*% beta_0 +
rep(Omega[i,q], J) +
rep(epsilon[i,q], J)
} else {
Y_iq <- Z_iq %*% alpha[q,] + B_iq %*% beta_0 +
B_iq %*% beta_k[g[i]] +
rep(Omega[i,q], J) +
rep(epsilon[i,q], J)
}
# Store Y_iq in Y
Y[i, q, ] <- Y_iq
}
}
Generate data_index_nonzero
data_index_nonzero ← array(0, c(I, Q, J))
for(i in 1:I) {
for(q in 1:Q) {
data_index_nonzero[i, q, ] ← which(data_index[i, q, ] == 1)
}
}
nz ← matrix(NA, I, Q)
for (i in 1:I) {
for (q in 1:Q) {
nz[i, q] ← sum(data_index[i, q, ])
}
}
List the data to be passed to Stan
stan_data ← list(I = I, Q = Q, J = J, S = S, L = L, K = K, g = g, y = y, Z = Z, B = B,
data_index = data_index, data_index_nonzero = data_index_nonzero,
nz = nz)
fit ← stan(file = ‘D:\Research\May\trajectory\stancode\try1\V3.stan’,
data = stan_data, iter = 10000, warmup = 1000, chains = 4)
And my model is defined as:
and
The prior is defined as:
I would really appreciate that if someone could know why it goes so slow, it almost takes 30minutes for each iteration! I will reply immediately!