Stan running is so low

Hello everyone!
Will no one encounter this problem? My data is a 10002010 matrix data, and What I did was a simulation study. Repeat 1000 batches of data, iterate 10,000 times, warm up 5000 times, run 3 chains, but the running is very slow. After 3 days of running, a batch of data was generated and it was still in an iteration. This way, my program will not work for one year. I do not know when it will be possible to run the results. Is there any way to speed up the iterative process?
Please provide some suggestions.

Posting the Stan program

timestart <- Sys.time()
options(digits=22)
library(rstan)
pnum<-1000
inum<-10
hnum<-50
var<-c(16,4,1,64,2,3,144)
iter=1000
inmc.vc<-array(0,dim=c(iter,length(var)))
inmc.se<-array(0,dim=c(iter,length(var)))
inmc.ci50<-array(0,dim=c(iter,length(var)))
inmc.ci950<-array(0,dim=c(iter,length(var)))
inmc.cover<-array(0,dim=c(iter,length(var)))
file_path<-"C:/Users/Administrator/Desktop/s/data/data_"
##开始循环
for (k in 1:iter){
  pz<-rnorm(pnum,0,(var[1])^0.5)
  iz<-rnorm(inum,0,(var[2])^0.5)
  hz<-rnorm(hnum,0,(var[3])^0.5)
  piz<-array(rnorm((pnum*inum),0,(var[4])^0.5),dim=c(pnum,inum))
  phz<-array(rnorm((pnum*hnum),0,(var[5])^0.5),dim=c(pnum,hnum))
  ihz<-array(rnorm((inum*hnum),0,(var[6])^0.5),dim=c(inum,hnum))
  pihz<-array(rnorm((pnum*inum*hnum),0,(var[7])^0.5),dim=c(pnum,inum,hnum))
  x<-array(0,dim=c(pnum,inum,hnum))
  for(p in 1:pnum){
    for(i in 1:inum){
      for(h in 1:hnum){
        x[p,i,h]<-0+pz[p]+iz[i]+hz[h]+piz[p,i]+phz[p,h]+ihz[i,h]+pihz[p,i,h]
      }
    }
  }
  write.csv(x,file=paste(file_path,k,".csv",sep = ""),row.names = F ) 
  gtdata<-list(np=pnum,ni=inum,nh=hnum,vc=var,x=x)
  mcmc.model<-'data{
  int<lower=1> np;
  int<lower=1> ni;
  int<lower=1> nh;
  real x[np,ni,nh];
  real vc[7];
}
parameters{
real<lower=0> sigmap;
real<lower=0> sigmai;
real<lower=0> sigmah;
real<lower=0> sigmapi;
real<lower=0> sigmaph;
real<lower=0> sigmaih;
real<lower=0> sigmae;
real u;
vector[np] P;
vector[ni] I;
vector[nh] H;
matrix[np,ni] PI;
matrix[np,nh] PH;
matrix[ni,nh] IH;
}
transformed parameters{
real sigmap2;
real sigmai2;
real sigmah2;
real sigmapi2;
real sigmaph2;
real sigmaih2;
real sigmae2;
sigmap2=sigmap^2;
sigmai2=sigmai^2;
sigmah2=sigmah^2;
sigmapi2=sigmapi^2;
sigmaph2=sigmaph^2;
sigmaih2=sigmaih^2;
sigmae2=sigmae^2;
}
model{
//define variable
real mu;
//likelihood
for (p in 1:np){
P[p]~normal(0,sigmap);
for (i in 1:ni){
PI[p,i]~normal(0,sigmapi);
}
for (h in 1:nh){
PH[p,h]~normal(0,sigmaph);
}
}
for (i in 1:ni){
I[i]~normal(0,sigmai);
}
for (h in 1:nh){
H[h]~normal(0,sigmah);
for (i in 1:ni){
IH[i,h]~normal(0,sigmaih);
}
}
for (p in 1:np){
for (i in 1:ni){
for (h in 1:nh){
mu=u+P[p]+I[i]+H[h]+PI[p,i]+PH[p,h]+IH[i,h];
x[p,i,h]~normal(mu,sigmae);
}
}
}
//prior
u~normal(0,sqrt(1000));
target+=inv_gamma_lpdf(sigmap2|2,16);
target+=inv_gamma_lpdf(sigmai2|2,4);
target+=inv_gamma_lpdf(sigmah2|2,1);
target+=inv_gamma_lpdf(sigmapi2|2,64);
target+=inv_gamma_lpdf(sigmaph2|2,2);
target+=inv_gamma_lpdf(sigmaih2|2,3);
target+=inv_gamma_lpdf(sigmae2|2,144);
}'
require(rstan)
require(reshape)
library(ggplot2)
mcmc.fit<-stan(model_code=mcmc.model,data=gtdata,chains=3,iter=10000,warmup=5000,algorithm="NUTS",
               pars=c("sigmap2","sigmai2","sigmah2","sigmapi2","sigmaph2","sigmaih2","sigmae2"))
setwd("C:/Users/Administrator/Desktop/t/plot/")
jpeg(filename=paste("trace_",k,".jpeg",sep = ""));stan_trace(mcmc.fit,pars=c("sigmap2","sigmai2","sigmah2","sigmapi2","sigmaph2","sigmaih2","sigmae2"));dev.off()
jpeg(filename=paste("ac_",k,".jpeg",sep = ""));stan_ac(mcmc.fit,pars=c("sigmap2","sigmai2","sigmah2","sigmapi2","sigmaph2","sigmaih2","sigmae2"));dev.off()
jpeg(filename=paste("hist_",k,".jpeg",sep = ""));stan_hist(mcmc.fit,pars=c("sigmap2","sigmai2","sigmah2","sigmapi2","sigmaph2","sigmaih2","sigmae2"),bins=30);dev.off()
##二元散射矩阵图
jpeg(filename=paste("pairs_",k,".jpeg",sep = ""));pairs(mcmc.fit,pars = c("sigmap2","sigmai2","sigmah2","sigmapi2","sigmaph2","sigmaih2","sigmae2"));dev.off()
result<-monitor(mcmc.fit,probs=c(0.05,0.95),print=F)
inmc.vc[k,]<-result[1:7,1]
inmc.se[k,]<-result[1:7,3]
inmc.ci50[k,]<-result[1:7,4]
inmc.ci950[k,]<-result[1:7,5]
inmc.cover[k,]<-ifelse(inmc.ci50[k,]<var&var<inmc.ci950[k,],1,0)
}
timeend <- Sys.time()
runningtime <- timeend-timestart
sink("C:/Users/Administrator/Desktop/s/output.txt",split=T)
colMeans(inmc.vc)
colMeans(inmc.se)
colMeans(inmc.ci50)
colMeans(inmc.ci950)
colMeans(inmc.cover)
print(runningtime)
sink()
write.csv(cbind(inmc.vc),file="C:/Users/Administrator/Desktop/s/vc.csv",row.names = F)
write.csv(cbind(inmc.se),file="C:/Users/Administrator/Desktop/s/se.csv",row.names = F)
write.csv(cbind(inmc.ci50),file="C:/Users/Administrator/Desktop/s/ci50.csv",row.names = F)
write.csv(cbind(inmc.ci950),file="C:/Users/Administrator/Desktop/s/ci950.csv",row.names = F)
write.csv(cbind(inmc.cover),file="C:/Users/Administrator/Desktop/s/cover.csv",row.names = F)
write.csv(cbind(timestart,timeend,runningtime),file="C:/Users/Administrator/Desktop/s/time.csv",row.names = F)
summary(mcmc.fit,pars=c("sigmap2","sigmai2","sigmah2","sigmapi2","sigmaph2","sigmaih2","sigmae2"))
write.csv(cbind(summary(mcmc.fit,pars=c("sigmap2"))),file="C:/Users/Administrator/Desktop/s/summary/psummary.csv",row.names = F)
write.csv(cbind(summary(mcmc.fit,pars=c("sigmai2"))),file="C:/Users/Administrator/Desktop/s/summary/isummary.csv",row.names = F)
write.csv(cbind(summary(mcmc.fit,pars=c("sigmah2"))),file="C:/Users/Administrator/Desktop/s/summary/hsummary.csv",row.names = F)
write.csv(cbind(summary(mcmc.fit,pars=c("sigmapi2"))),file="C:/Users/Administrator/Desktop/s/summary/pisummary.csv",row.names = F)
write.csv(cbind(summary(mcmc.fit,pars=c("sigmaph2"))),file="C:/Users/Administrator/Desktop/s/summary/phsummary.csv",row.names = F)
write.csv(cbind(summary(mcmc.fit,pars=c("sigmaih2"))),file="C:/Users/Administrator/Desktop/s/summary/ihsummary.csv",row.names = F)
write.csv(cbind(summary(mcmc.fit,pars=c("sigmae2"))),file="C:/Users/Administrator/Desktop/s/summary/esummary.csv",row.names = F)

That is pretty much the slowest possible way to draw from that posterior distribution. The main thing you need to do in order to go faster is the eliminate loops that contribute to the posterior log-kernel. But your model was missing a bunch of Jacobian terms when you put priors on the variances (which does not make a lot of sense usually) so I declared those as parameters and made the standard deviations be transformed parameters. Like this:

data {
  int<lower=1> np;
  int<lower=1> ni;
  int<lower=1> nh;
  real x[np,ni,nh];
  real vc[7];
}
parameters{
  real<lower=0> sigmap2;
  real<lower=0> sigmai2;
  real<lower=0> sigmah2;
  real<lower=0> sigmapi2;
  real<lower=0> sigmaph2;
  real<lower=0> sigmaih2;
  real<lower=0> sigmae2;
  real u;
  vector[np] P;
  vector[ni] I;
  vector[nh] H;
  matrix[np,ni] PI;
  matrix[np,nh] PH;
  matrix[ni,nh] IH;
}
transformed parameters{
  real sigmap = sqrt(sigmap);
  real sigmai = sqrt(sigmai);
  real sigmah = sqrt(sigmah);
  real sigmapi = sqrt(sigmapi);
  real sigmaph = sqrt(sigmaph);
  real sigmaih = sqrt(sigmaih);
  real sigmae = sqrt(sigmae);
}
model{
  P ~ normal(0, sigmap);
  to_vector(PI) ~ normal(0, sigmapi);
  to_vector(PH) ~ normal(0, sigmaph);
  I ~ normal(0, sigmai);
  H ~ normal(0, sigmah);
  to_vector(IH) ~ normal(0, sigmaih);

  for (p in 1:np){
    matrix[ni, nh] mu;
    for (i in 1:ni){
      for (h in 1:nh){
        mu[i,h] = u + P[p] + I[i] + H[h] + PI[p,i] + PH[p,h] + IH[i,h];
      }
    }
   to_vector(x[p]) ~ normal(to_vector(mu), sigmae);
  }

  // prior
  u~normal(0, sqrt(1000));
  target += inv_gamma_lpdf(sigmap2| 2, 16);
  target += inv_gamma_lpdf(sigmai2| 2, 4);
  target += inv_gamma_lpdf(sigmah2| 2, 1);
  target += inv_gamma_lpdf(sigmapi2| 2, 64);
  target += inv_gamma_lpdf(sigmaph2| 2, 2);
  target += inv_gamma_lpdf(sigmaih2| 2, 3);
  target += inv_gamma_lpdf(sigmae2| 2, 144);
}

But you will probably need to use a non-centered parameterization of the conditional mean and some other stuff. It looks as if you are familiar with BUGS, in which case you should spend time learning the Stan approach, reading appendix B in the Stan Manual, and the FAQ.

1 Like

Thanks for your advice!
But the loop times are 1000, because I want to produce 1000 batches of data, which is a necessary part. If it is the mian reason, leading to running slowly, I would only wait for the result and can’t delete it.

Sure, but this is in the R code. Ben’s Stan code should be fine. I repeat what Ben said: