I have the following code for a hierarchical model that I coded with MCMC in R before. Surprisingly, this Stan code runs super slow (on Mac OS) comparing to my MCMC in R. Did I do anything obviously silly to slow it down? I have attached all the testing code and data here if anyone could help me out. Any comments would be appreciated.

```
functions{
// a function to calculate the square root of f ratios
real sqrt_f_ratio(real nu_j, real nu_0, real rho_0, row_vector omega1){
real alpha;
real front;
real omega2;
omega2 = omega1[1]^2 + omega1[2]^2;
alpha = 4 * nu_0 / rho_0^2;
front = (tgamma(nu_j + 1) * tgamma(nu_0)) /
(tgamma(nu_j) * tgamma(nu_0 + 1));
return (sqrt(front * (alpha / (alpha + omega2))^(nu_j - nu_0)));
}
}
data {
int<lower=1> N; // number of locations
int<lower=1> P; // number of variables
int<lower=1> L; // number of frequencies
matrix[N, 2] s; // locations N by 2
matrix[L, 2] omega; // frequencies L by 2
matrix[N, P] Y; // data N row-vectors of length P
cov_matrix[P] Sigma; // covariance matrix of amplitudes
}
parameters {
vector<lower=0>[P] Tau2; // nugget variance
vector<lower=0>[P+1] Nu; // nu's for spectral fct f_0 ... f_p
vector<lower=0>[P+1] Rho;
matrix[L, P] A; // amplitudes L row-vectors of length P
matrix[L, P] B;
}
model {
matrix[P,P] D1[L,N]; // L by N array of diagonal matrices of size P
matrix[P,P] D2[L,N];
matrix[N,P] W; // N by P matrix for the mean
row_vector[P] Lsum; // P by 1 row-vector to initiate summation over L
vector[P] sfr; // sqrt of f_ratio as a vector of length P
// priors
for(p in 1:(P+1)){
Nu[p] ~ lognormal(-2, 1); // nu_p = exp(Normal) i.e. log(nu_p) ~ Normal
Rho[p] ~ lognormal(0, 1);
}
for(p in 1:P){
Tau2[p] ~ inv_gamma(0.1, 0.1);
}
for (l in 1:L){ //single index of matrix refers to its row
A[l] ~ multi_normal(rep_vector(0,P), Sigma); // P by 1 row-vector amplitude
B[l] ~ multi_normal(rep_vector(0,P), Sigma); // P by 1 row-vector amplitude
for(j in 1:P){
sfr[j] = sqrt_f_ratio(Nu[j+1], Nu[1], Rho[j+1], omega[l]);
}
for(i in 1:N){
D1[l,i] = cos(omega[l]*s[i]') * diag_matrix(sfr);
// diagonal matrix of size P
D2[l,i] = sin(omega[l]*s[i]') * diag_matrix(sfr);
// diagonal matrix of size P
}
}
for (i in 1:N){
Lsum = rep_row_vector(0, P); // P by 1 row-vector of all 0
for(l in 1:L){ // to sum over l
W[i] = Lsum + A[l] * D1[l,i] + B[l] * D2[l,i]; // P by 1 row-vector after summing over l
}
Y[i] ~ multi_normal(W[i], diag_matrix(Tau2));
}
}
```

And, here is the R part to read everything in and run Stan.

```
## load the simulated data
library(RCurl)
url1 <- "https://raw.githubusercontent.com/wenlongG/data/main/Y.csv"
url2 <- "https://raw.githubusercontent.com/wenlongG/data/main/s.csv"
Y <- read.csv(text = getURL(url1))
s <- read.csv(text = getURL(url2))
N = nrow(Y)
P <- ncol(Y)
L <- 100
nu.vec_True <- c(1,1,4)
rho_True <- c(2,2,4)
grid <- seq(-pi, pi, length.out = 10)
omega <- expand.grid(grid, grid)
# use a simplified Sigma for now, instead of the mixture model
Sigma <- 0.2 * diag(P) + 0.8
## input data for Stan
stan_data <- list(N = N,
P = P,
L = L,
s = s,
omega = omega,
Y = Y,
Sigma = Sigma)
## initial values for Stan
init_0 <- function() {
list(Tau2 = rep(0.2, P),
Nu = nu.vec_True, # initiate with true value
Rho = rho_True,
A = matrix(rnorm(P*L),nrow = L, ncol = P),
B = matrix(rnorm(P*L),nrow = L, ncol = P)
)
}
## call stan to run HMC
library("rstan")
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
fit0 <- stan(model_code = stan0,
data = stan_data,
chains = 4,
init = init_0,
#warmup = 500,
iter = 40,
verbose = TRUE
)
```