Stan efficient!

I am trying to fit a multivariate linear mixed (MLMM) for the analysis of multiple response |\underbrace{\mathbf{y}}_{qn\times 1} = \boldsymbol{X\beta} + \boldsymbol{\underbrace{Z}_{qn\times qm}\underbrace{\gamma}_{qm\times 1}} + \boldsymbol{\varepsilon},~~ ||\boldsymbol{\gamma}\sim \mathcal{N}(\textbf{0},I_m\otimes\mathbf{G}),~~| ||\boldsymbol{\varepsilon}\sim \mathcal{N}(\textbf{0},I_n\otimes\mathbf{R})

My stan program is too slow. The Gradient evaluation took 0.136 seconds or 1000 transitions using 10 leapfrog steps per transition would take 1360 seconds. That is too long. How can I improve. My r code and stan code are below

q<-3          
m<-20 
cs=12  
n=cs*m   

### Design matrix for the fixed part for y
### contains constant of 50 and for simplicity a randomly-drawn
### covariate from a standard Normal distribution.
intercept = 50
cont_var<-rnorm(q*n,0,1) 

X <- as.matrix(cbind(intercept,cont_var))
beta=c(1,1)
fixed_outcome = X %*% beta

G_j = matrix(c(1,.51,.34,.51,1,.26,.34,.26,1), ncol = 3)
#diag_mat_tau =0.25*diag(3) diag_mat_tau %*% R %*% diag_mat_tau # VCV
G = kronecker(diag(m),G_j)
random_eff = mvrnorm(1, rep(0,q*m), G)
b = as.matrix(random_eff)

Z <- matrix(0,q*n,q*m)
j=1
vect1 <- numeric(n)
vect2 <- numeric(n)
vec1 <- numeric(q*n)
vec <- numeric(q*n)
vec2 <- numeric(q*n)
while(j <= q*n-1){
  # for(j in 1:(response*students-1)){
  ind <- sample(q*m,1,rep=F)
  ind2 <- sample(q*m,1,rep=F)
  Z[j,ind] <- 1
  Z[j+1,ind2] <- 1
  Z[j+1,ind] <- 0
  T[j,1] <- ind
  T[j+1,1] <- ind2
  vec[j]=ind
  vec2[j]=ind2
  j=j+2
 }

random_outcome = Z%*%b
##Error part
R_i=matrix(c(1.0, 0.7, 0.49,0.7, 1.0, 0.7,0.49, 0.7, 1.0), ncol = 3)
R=kronecker(diag(n),R_i)
epsilons=mvrnorm(1,mu = rep(0, q*n), Sigma =R )

y=fixed_outcome+random_outcome +epsilons


functions {
 matrix block_diag(matrix qq, int n) {
  int r = 1;
  int sz = rows(qq);
  int Q = sz * n;
  matrix[Q, Q] G;

  for (i in 1:Q) {
    for (j in 1:Q) {
      G[i, j] = 0;
    }
  }

  while (r < Q) {
    G[r:(r + sz - 1), r:(r + sz - 1)] = qq;
    r += sz;
  }

  return G;
}

} 

data {
    int alpha_low;  // alpha boundary value 
    int alpha_up;  // alpha boundary value 
    int<lower=1> n;  // number of students
    int<lower=1> q;     //number of responses
    int<lower=1> m;    //number of random effects
    int<lower=1> qm;
    int<lower=1> qn;
    int<lower=1> P;  //number of parameters
    matrix[qn, P] X; // Fixed effects design matrix
    vector[qn] y; // response variable
    int vec[qn] ;
  int vec2[qn] ;
 }
 
parameters {
  vector[P] beta;
  vector[qm] gamma;
 corr_matrix[q] Omega_G;
  corr_matrix[q] Omega_R;
  vector<lower=0>[q] sigma_G;
  vector<lower=0>[q] sigma_R;
  real<lower=alpha_low-0.01, upper=alpha_up+0.01> alpha; 
 }

transformed parameters {
  
   matrix[q,q] G_j; // VCV matrix
   matrix[q,q] R_i; // VCV matrix
   matrix[qm, qm] G;
   matrix[qn, qn] R;
  matrix[qn,qm] Z=rep_matrix(0,qn,qm);       //subj ranef design matrix
   G_j = quad_form_diag(Omega_G, sigma_G); 
    R_i = quad_form_diag(Omega_R, sigma_R); 
    G=block_diag(G_j,m);
    R=block_diag(R_i,n);
    for(j in 1:n){
    Z[2*j-1,vec[2*j-1]]= 1;
 Z[2*j,vec2[2*j-1]] =1;
 Z[2*j,vec[2*j-1]] = alpha;}
}


model {
  beta ~ normal(0, 10);
  sigma_G ~ student_t(4,0,1);
  sigma_R ~ student_t(4,0,1);
   Omega_G ~ lkj_corr(1.0);
  Omega_R ~ lkj_corr(2.0); 
  gamma ~ multi_normal(rep_vector(0, qm), G);
   y ~ multi_normal(X * beta + Z * gamma, R);
}


The cholesky decomposed sampling of matrices is much faster. For a multi_normal_cholesky
function, see 23.3 Multivariate normal distribution, Cholesky parameterization | Stan Functions Reference
for more details.
Following is a Stan version of the kronecker product of cholesky decomposed matrices.

  vector chol_kronecker_product(matrix LA, matrix LG, vector a) {
    vector[rows(LA)*cols(LG)] new_a;
    new_a = rep_vector(0, rows(LA)*cols(LG));
    for(iA in 1:cols(LA)){
      for(jA in 1:iA){
        if(LA[iA, jA] > 1e-10){ // avoid calculating products between unrelated individuals
          for(iG in 1:cols(LG)){
            for(jG in 1:iG){
              new_a[(cols(LG)*(iA-1))+iG] = new_a[(cols(LG)*(iA-1))+iG] +
                                            LA[iA, jA] * LG[iG, jG] * a[(cols(LG)*(jA-1))+jG];
            }
          }
        }
      }
    }
    return new_a;
  }
4 Likes

Thanks. I think you mean that stan is doing Kronecker product on vectors is faster than on matrices. However, I still do not get how to use your provided function

@andre.pfeuffer I don’t understand this either. If remove a (assuming that’s like sigma) and expose this in R and compare to

model_code <- "
functions {
vector chol_kronecker_product(matrix LA, matrix LG) {
    vector[rows(LA)*cols(LG)] new_a = rep_vector(0, rows(LA)*cols(LG));
    for(iA in 1:cols(LA))
      for(jA in 1:iA)
        if(LA[iA, jA] > 1e-10) // avoid calculating products between unrelated individuals
          for(iG in 1:cols(LG))
            for(jG in 1:iG)
              new_a[(cols(LG)*(iA-1))+iG] = new_a[(cols(LG)*(iA-1))+iG] +  LA[iA, jA] * LG[iG, jG];
           
    return new_a;
  }
}"
library(rstan)
library(rethinking)

A <- rlkjcorr(1, K = 3, eta = 2)
B <- rlkjcorr(1, K = 2, eta = 2)

expose_stan_functions(stanc(model_code = model_code))

t(chol(kronecker(B, A)))
chol_kronecker_product(chol(A), chol(B))

I get out

> t(chol(kronecker(A, B)))
            [,1]       [,2]       [,3]          [,4]       [,5]      [,6]
[1,]  1.00000000 0.00000000  0.0000000  0.000000e+00  0.0000000 0.0000000
[2,] -0.46681303 0.88435604  0.0000000  0.000000e+00  0.0000000 0.0000000
[3,]  0.10072263 0.00000000  0.9949145  0.000000e+00  0.0000000 0.0000000
[4,] -0.04701864 0.08907467 -0.4644391  8.798587e-01  0.0000000 0.0000000
[5,]  0.45090687 0.00000000 -0.6577363  6.309099e-17  0.6033788 0.0000000
[6,] -0.21048920 0.39876221  0.3070399 -5.816731e-01 -0.2816651 0.5336017
> chol_kronecker_product(chol(A), chol(B))
[1] 1.0000000 0.8843560 0.9949145 0.8798587 0.6033788 0.5336017

where the diagonal is returned but what about all the lower elements?

1 Like

Used above code from Multivariate animal model. The topic advanced recently. There are faster and better ways to calculate the Kronecker product.

@jenvent I don’t mean that the product is faster on vectors than on matrices. I mean sampling on the cholesky-form is much faster and therefore we need the kronecker product based upon cholesky decomposed matrices.

If I’m reading your model right, you have your outcome constructed as a single large outcome, with the vector vector for every individual appended. Additionally, you have the covariance matrices for every individual packed into a single large block-diagonal matrix, is that correct?

If so, it is going to be markedly more efficient to work with an array of outcome vectors, see this section of the User’s Guide for more info: 1.15 Multivariate outcomes | Stan User’s Guide

1 Like

I think this does the kronecker cholesky. There’s some unnecessary multiplication because of the 0s in the upper tri but it only occurs in the sub-matrix B. The output matrix C is lower tri-block-wise made so the loop never goes into the upper tri blocks of C.

matrix chol_kronecker_prod(matrix A, matrix B) {
  int m = rows(A);
  int p = rows(B);
  int N = m * p;
  matrix[N, N] C = rep_matrix(0., N, N);

  for (i in 1:m) {
    for (j in 1:i) {
      if (fabs(A[i, j]) > 1e-12) {
      int row_start = (i - 1) * p + 1;
      int row_end = (i - 1) * p + p;
      int col_start = (j - 1) * p + 1;
      int col_end = (j - 1) * p + p;
      C[row_start:row_end, col_start:col_end] = A[i, j] * B;
      }
    }
  }
  return C;
}

Guy, I followed your advice; it cut the sampling time in two. I will need cmdstan to use the array option—any suggestion to improve.
Thanks
stan code
"
functions {
matrix chol_kronecker_prod(matrix A, matrix B) {
int m = rows(A);
int p = rows(B);
int N = m * p;
matrix[N, N] C = rep_matrix(0., N, N);

for (i in 1:m) {
for (j in 1:i) {
if (fabs(A[i, j]) > 1e-12) {
int row_start = (i - 1) * p + 1;
int row_end = (i - 1) * p + p;
int col_start = (j - 1) * p + 1;
int col_end = (j - 1) * p + p;
C[row_start:row_end, col_start:col_end] = A[i, j] * B;
}
}
}
return C;
}

}

data {
int alpha_low; // alpha boundary value
int alpha_up; // alpha boundary value
int<lower=1> n; // number of students
int<lower=1> q; //number of responses
int<lower=1> m; //number of random effects
int<lower=1> qm;
int<lower=1> qn;
int<lower=1> P; //number of parameters
matrix[qn, P] X; // Fixed effects design matrix
vector[qn] y; // response variable
// array[n] vector[q] y;
int vec[qn] ;
int vec2[qn] ;
}
transformed data {// identity_matrix if used cmdstan
vector[m] One_m=rep_vector(1.0,m);
vector[n] One_n=rep_vector(1.0,n);
matrix[m,m] I_m=diag_matrix(One_m);
matrix[n,n] I_n=diag_matrix(One_n);
}
parameters {
vector[P] beta;
vector[qm] gamma;
cholesky_factor_corr[q] corr_G;
cholesky_factor_corr[q] corr_R;
vector<lower=0>[q] sigma_G;
vector<lower=0>[q] sigma_R;
real<lower=alpha_low-0.01, upper=alpha_up+0.01> alpha;
}

transformed parameters {
matrix[q,q] Omega_G;
matrix[q,q] Omega_R;
matrix[q,q] G_j; // VCV matrix
matrix[q,q] R_i; // VCV matrix
matrix[qm, qm] G;
matrix[qn, qn] R;
matrix[qn,qm] Z=rep_matrix(0,qn,qm); //subj ranef design matrix
Omega_G= multiply_lower_tri_self_transpose(corr_G); // Lcorr * Lcorr’
Omega_R= multiply_lower_tri_self_transpose(corr_R);
G_j = quad_form_diag(Omega_G, sigma_G);
R_i = quad_form_diag(Omega_R, sigma_R);
G=chol_kronecker_prod(I_m,G_j);
R=chol_kronecker_prod(I_n,R_i);
for(j in 1:n){
Z[2j-1,vec[2j-1]]= 1;
Z[2j,vec2[2j-1]] =1;
Z[2j,vec[2j-1]] = alpha;}
}

model {
beta ~ normal(0, 10);
sigma_R ~ student_t(4,0,1);
sigma_G ~ student_t(4,0,1);
corr_R ~lkj_corr_cholesky(2);
corr_R ~lkj_corr_cholesky(2);
gamma ~ multi_normal_cholesky(rep_vector(0, qm), G);
y~ multi_normal_cholesky(X * beta +Z*gamma, R);//
}

"