I’m trying to speed up a stan code, and the largest loop seems like a good place to start. In the below code (which is a part of a larger code posted afterwards), the number of Reports
are around 70,000, meaning the expression mu[KretsNrSeqID[r]] * A[r] * exp(logrelA[r] * phi[1])
must be calculated many times. For clarity, the array mu
(approximately of length 4,000) is a function of parameters, phi
is a parameter and KretsNrSeqID
, A
and logrelA
are data.
Two questions:
-
Could this likely be sped up by vectorizing the calculation of
mu[KretsNrSeqID[r]] * A[r] * exp(logrelA[r] * phi[1])
outside the loop? -
There are different versions of the model, hence the
includealpha
andincludephi
variables, which are given as data input, and the code is set up to do exact LOO for observations where the PSIS-LOO approximation fails, hence theleftout
variable. The way the code is written suggests the if statements are evaluated every iteration of the loop. Is there any benefit to rewriting the code to possibly avoid checking each if-statements 70,000 times?
for (r in 1:Reports){
if (r!=leftout){
if (includealpha ){
if (includephi){
K[r] ~ neg_binomial_2(mu[KretsNrSeqID[r]] * A[r] * exp(logrelA[r] * phi[1]) ,alpha[KretsNrSeqID[r]]);
}else{
K[r] ~ neg_binomial_2(mu[KretsNrSeqID[r]] * A[r] ,alpha[KretsNrSeqID[r]]);
}
} else {
if (includephi){
K[r] ~ poisson(mu[KretsNrSeqID[r]] * A[r] * exp(logrelA[r] * phi[1]));
}else{
K[r] ~ poisson(mu[KretsNrSeqID[r]] * A[r]);
}
}
}
}
Here is the code in its entirety
data {
int<lower=1> NrOfYears;
int<lower=1> NrOfLans;
int<lower=1> NrOfKretsar;
// int<lower=0> NrOfCovs;
int<lower=1> Reports;
int<lower=0> leftout;
int<lower=0> K[Reports];
real<lower=0> A[Reports];
real logrelA[Reports];
int<lower=1> KretsNrSeqID[Reports];
int<lower=1> LansSeqID[NrOfLans];
int<lower=1> LanSeqID[Reports];
int<lower=1> KretsarSeqID[NrOfKretsar];
int<lower=1> KretsListLanSeqID[NrOfKretsar];
int<lower=1> KretsYearSeq[NrOfKretsar];
int<lower=0, upper=1> includealpha;//Boolean declaration
// int<lower=0, upper=1> usecov;//Boolean declaration
int<lower=0, upper=1> includegamma;//
int<lower=0, upper=1> includephi;//
int<lower=0, upper=1> AnalyzeWAICandLOO;
int<lower=0, upper=1> doexactloo;
// matrix[NrOfKretsar,NrOfCovs] COVMat;
int<lower=0, upper=1> FirstKretsAppearance[NrOfKretsar];
int<lower=0> FwdConnectionSeqID[NrOfKretsar,14];
}
parameters {
real lambda_raw[NrOfLans,NrOfYears];//County effect on average bagging rate
real chi_raw[NrOfKretsar];//for reparimeterization of chi
real omega1; //Nationwide effect on logmu year 1
real upsilon[includealpha ? 1 : 0];//Nationwide effect on alpha
real phi[includephi ? 1 : 0];//effect of hunting area on er area bagging rate
real<lower=0> sigma;//standard deviation of county effects on average bagging rate
real<lower=0> tao;//standard deviation of county level effects on average bagging rate
real gamma[includegamma ? 1 : 0];////effect of bagging intensity on alpha
//vector[NrOfCovs] beta;
real<lower=0> Somega;//Standard deviation of between year change of W
real omegachange_raw[NrOfYears-1]; // raw between year change in nationwide effect on logm
real<lower=-1,upper=1> rholambda; //correlation, county effects
real<lower=-1,upper=1> rhochi;//correlation, HMP effects
}
transformed parameters {
real omega[NrOfYears]; //Nationwide effect on average bagging rate
real<lower=0> alpha[includealpha ? NrOfKretsar : 0];// Shape parameter of the Gamma-Poisson
real logeffect[NrOfKretsar];
real<lower=0> mu[NrOfKretsar]; //Mean bagging rate in HMP
real chi[NrOfKretsar]; //County level effect on average bagging rate
real lambda[NrOfLans,NrOfYears]; //County level effect on average bagging rate
// vector[usecov ? NrOfKretsar : 0] B;
real omegachange[NrOfYears-1]; //between year change in nationwide effect on logm
omega[1]=omega1;
for (iy in 2:NrOfYears){
omegachange[iy-1]=omegachange_raw[iy-1] * Somega;//
omega[iy] =omega[iy-1]+omegachange[iy-1];
}
// if (usecov){
//
// B=COVMat*beta;
//
// }
for (ik in 1:NrOfKretsar){
chi[ik]=chi_raw[ik] * sigma;// reparametrisation of chi for improved computation
}
for (il in 1:NrOfLans){
for (iy in 1:NrOfYears){
lambda[il,iy]=lambda_raw[il,iy] * tao;// reparametrisation of lambda for improved computation
}
}
for (ik in 1:NrOfKretsar){
logeffect[ik]=omega[KretsYearSeq[ik]] + lambda[KretsListLanSeqID[ik],KretsYearSeq[ik]] + chi[ik] ;
// if (usecov){
//
// logeffect[ik]+=B[ik];
//
// }
mu[ik] = exp(logeffect[ik]);
if (includealpha ){
if (includegamma) {
alpha[ik] = exp(upsilon[1] + gamma[1]*(lambda[KretsListLanSeqID[ik],KretsYearSeq[ik]] + chi[ik]));
}else{
alpha[ik] = exp(upsilon[1]);
}
}
}
}
model{
for (r in 1:Reports){
if (r!=leftout){
// Number of killed animals in report r is negative binomial distributed,
// but defined from the mean (m[KretsNrSeqID[r]] * A[r]) and shape (c[Lan[r]]) of the gamma distribution of the equivalent gamma-poisson mixture
if (includealpha ){
if (includephi){
K[r] ~ neg_binomial_2(mu[KretsNrSeqID[r]] * A[r] * exp(logrelA[r] * phi[1]) ,alpha[KretsNrSeqID[r]]);
}else{
K[r] ~ neg_binomial_2(mu[KretsNrSeqID[r]] * A[r] ,alpha[KretsNrSeqID[r]]);
}
} else {
if (includephi){
K[r] ~ poisson(mu[KretsNrSeqID[r]] * A[r] * exp(logrelA[r] * phi[1]));
}else{
K[r] ~ poisson(mu[KretsNrSeqID[r]] * A[r]);
}
}
}
}
for(ik in 1:NrOfKretsar){
if (FirstKretsAppearance[ik]){
chi_raw[ik] ~ normal(0,1);// reparametrisation for improved computation
}else{
chi_raw[ik] ~ normal(rhochi*chi_raw[FwdConnectionSeqID[ik,1]],sqrt((1-rhochi^2)));
}
}
for (il in 1:NrOfLans){
lambda_raw[il,1] ~ normal(0,1);
for (iy in 2:NrOfYears){
lambda_raw[il,iy] ~ normal(rholambda*lambda_raw[il,iy-1],sqrt((1-rholambda^2)));
}
}
if (includealpha){
upsilon[1] ~ normal(0,3.0);//based on 95% coef var between 0.1 and 10
}
if (includegamma){
gamma[1] ~ normal(0,1); //Based on being ~95% sure that shap parameter changes less than four times as high or a quarter as low alpha with twice the intensity. Same as CoV doubling or halfing.
}
if (includephi){
phi[1] ~ normal(0,0.5);// based on 95% between -0.585 and 0.585, which corresponds to maximum 300% increas in hunting rate with twice the area
}
omega1 ~ normal(-8.7,4.3);
for (iy in 2:NrOfYears){
omegachange_raw[iy-1]~normal(0,1);
}
Somega ~ cauchy(0,2.5);
tao ~ lognormal(0.20,2.3);
sigma ~ lognormal(-1.3,0.86);
}
generated quantities {
vector[doexactloo ? 1 : 0] log_lik_exact;
vector[AnalyzeWAICandLOO ? Reports : 0] log_lik; // can't be computed if doexactloo
if (doexactloo){
if (includealpha){
if (includephi){
log_lik_exact[1] = neg_binomial_2_lpmf(K[leftout] | mu[KretsNrSeqID[leftout]] * A[leftout] * exp(logrelA[leftout] * phi[1]), alpha[KretsNrSeqID[leftout]]);
}else{
log_lik_exact[1] = neg_binomial_2_lpmf(K[leftout] | mu[KretsNrSeqID[leftout]] * A[leftout], alpha[KretsNrSeqID[leftout]]);
}
} else {
if (includephi){
log_lik_exact[1] = poisson_lpmf(K[leftout] | mu[KretsNrSeqID[leftout]] * A[leftout] * exp(logrelA[leftout] * phi[1]));
}else{
log_lik_exact[1] = poisson_lpmf(K[leftout] | mu[KretsNrSeqID[leftout]] * A[leftout]);
}
}
}
if (AnalyzeWAICandLOO){
for (r in 1:Reports){
if (includealpha){
if (includephi){
log_lik[r] = neg_binomial_2_lpmf(K[r] | mu[KretsNrSeqID[r]] * A[r] * exp(logrelA[r] * phi[1]), alpha[KretsNrSeqID[r]]);
}else{
log_lik[r] = neg_binomial_2_lpmf(K[r] | mu[KretsNrSeqID[r]] * A[r], alpha[KretsNrSeqID[r]]);
}
} else {
if (includephi){
log_lik[r] = poisson_lpmf(K[r] | mu[KretsNrSeqID[r]] * A[r] * exp(logrelA[r] * phi[1]));
}else{
log_lik[r] = poisson_lpmf(K[r] | mu[KretsNrSeqID[r]] * A[r]);
}
}
}
}
}