Ordered_probit is slow - Any way to speed up?

I’m attempting to fit a latent interaction model with ordered categorical indicators. Profiling has shown that the “ordered_probit” part of the model is slowing things down. Is there anything I can do to speed up my code?

Stan code below (intended to be saved as “cat_v1.stan”, which is followed by an R script to generate the data.

data {

  int<lower = 1> N;
  int<lower = 1> xK;
  int<lower = 1> zK;
  int<lower = 1> yK;

  int x[N,xK];
  int z[N,zK];
  int y[N,yK];
  
}

transformed data {

  vector[2] zero_vec;
  zero_vec = rep_vector(0, 2);

  array[N] vector[2] zero_arr;
  zero_arr = rep_array(zero_vec, N);
  
}

parameters {
  
  // underlying continuous variables
  array[xK] vector[N] xstar;
  array[zK] vector[N] zstar;
  array[yK] vector[N] ystar;
  
  // loadings
  vector <lower=0, upper=1> [xK] xLambda;
  vector <lower=0, upper=1> [zK] zLambda;
  vector <lower=0, upper=1> [yK] yLambda;
  
  // thresholds
  array[xK] ordered[3] xc;
  array[zK] ordered[3] zc;
  array[yK] ordered[3] yc;

  // structural parameters
  vector[3] gamma;
  real eta_int;
  
  // latent variables
  vector[N] eta;
  array[N] vector[2] xi;
  cholesky_factor_corr[2] sigma_L; // for xi

}

transformed parameters {
  
  // "transpose" xi so can do vectorized operations
  array[2] vector[N] txi;

  for (j in 1:2) {
    for (i in 1:N) {
      txi[j][i] = xi[i][j];
    }
  }

  // all of the mu's
  vector[N] eta_mu;
  array[3] vector[N] x_mu;
  array[3] vector[N] z_mu;
  array[3] vector[N] y_mu;
  
  // indicator variances
  vector <lower=0,upper=1> [xK] x_sd;
  vector <lower=0,upper=1> [zK] z_sd;
  vector <lower=0,upper=1> [yK] y_sd;

  // computer mu's and variances
  for(k in 1:3) {
    x_mu[k] = xLambda[k]*txi[1];
    z_mu[k] = zLambda[k]*txi[2];
    y_mu[k] = yLambda[k]*eta;
    
    x_sd[k] = sqrt(1 - xLambda[k]^2);
    z_sd[k] = sqrt(1 - zLambda[k]^2);
    y_sd[k] = sqrt(1 - yLambda[k]^2);
  }
  
  eta_mu = 
    eta_int + 
    gamma[1]*txi[1] + 
    gamma[2]*txi[2] + 
    gamma[3]*rows_dot_product(txi[1], txi[2])
  ;
  
  real eta_sd;
  eta_sd = sqrt(1 - variance(eta_mu));

}

model {

  // priors
  gamma ~ normal(0.5, 1);
  xLambda ~ beta(2,2);
  zLambda ~ beta(2,2);
  yLambda ~ beta(2,2);
  eta_int ~ normal(0, 1);
  sigma_L ~ lkj_corr_cholesky(2);

  for(k in 1:3) {

    // profile("stars"){
    target += normal_lpdf(xstar[k] | x_mu[k], x_sd[k]);
    target += normal_lpdf(zstar[k] | z_mu[k], z_sd[k]);
    target += normal_lpdf(ystar[k] | y_mu[k], y_sd[k]);
    // }
    
    // profile("indicators"){
    target += ordered_probit_lpmf(x[,k] | xstar[k], xc[k]);
    target += ordered_probit_lpmf(z[,k] | zstar[k], zc[k]);
    target += ordered_probit_lpmf(y[,k] | ystar[k], yc[k]);
    // }
    
  }

  // profile("struc"){ 
  target += multi_normal_cholesky_lpdf(xi | zero_arr, sigma_L);
  target += normal_lpdf(eta | eta_mu, eta_sd);
  // }
  
}

R code:

library(tidyverse)
library(mvtnorm)
library(Matrix)
library(rstan)

set.seed(12479)
k <- 5
n <- 500
rho <- .3

sigma <- rbind(
  c(1, rho),
  c(rho, 1)
)

xi <- rmvnorm(n, sigma = sigma)

eta <- .4*xi[,1] + .4*xi[,2] + .2*xi[,1]*xi[,2] +
  rnorm(n, sd = sqrt(1 - .2616776))

lat <- cbind(xi, eta) %>% Matrix()

lambda <- rbind(
  c(.7, .7, .7, 0, 0, 0, 0, 0, 0),
  c(0, 0, 0, .7, .7, .7, 0, 0, 0),
  c(0, 0, 0, 0, 0, 0, .7, .7, .7)
) %>% Matrix(sparse = TRUE)

e_sd <- lambda
e_sd[lambda != 0] <- sqrt(1 - lambda[lambda != 0]^2)
e_sd <- sqrt(1 - lambda^2)

x <- lat %*% lambda %>%
  as.matrix()

e <- x
e[] <- rnorm(length(x), sd = sqrt(1 - .7^2))

x2 <- x + e
x2[] <- cut(x, breaks = c(-Inf, -.65, 0, .65, Inf))

data <- list(
  N = n,
  xK = 3L,
  zK = 3L,
  yK = 3L,
  x = x2[, 1:3],
  z = x2[, 4:6],
  y = x2[, 7:9]
)

huh <- stan_model(file = "cat_v1.stan")
idk <- sampling(huh, data = data, cores = 4, iter = 500)

Apologies for not having time… the things to look into:

  • use the respective _glm function
  • start to use reduce_sum based parallelisation

As a way to get start with this I’d recommend using brms to create a similar model. That way you get an idea of how to code this.