I’m moving this question from a previous question linked here because I think it has transitioned from the original problem. I have a hierarchical logit/categorical model with a large dataset. I’ve set it up to run on a GPU through my institutions HPC.
I get a c++ error
stan/lib/stan_math/lib/eigen_3.3.9/Eigen/src/Core/CommaInitializer.h:118: XprType& Eigen::CommaInitializer<MatrixType>::finished() [with XprType = Eigen::Matrix<double, 1, -1>]: Assertion ((m_row+m_currentBlockRows) == m_xpr.rows() || m_xpr.cols() == 0) && m_col == m_xpr.cols() && "Too few coefficients passed to comma initializer (operator<<)"' failed.
It seems to relate to my dataset size because it runs fine if I include 100 records. As I increase the dataset size (total dataset ~187,000 rows), it starts giving me the above error. The HPC isn’t giving me any error that I can see. I’m asking for 4 cores, a Tesla V100, with OPA and 16 GB RAM, and 16GB RAM from the CPU. It’s my first time working with a GPU, so I may be missing something in the setup.
Checking the GPU status while my code runs with 100 rows, it only uses 7% of the GPU. I’m not sure why it wouldn’t run with more data then.
R script:
model.1 = cmdstan_model("mtc_equity_2.stan", cpp_options = list(stan_opencl = TRUE), stanc_options = list("O1"))
samp_ct = 100L
mod = mod[1:samp_ct,]
data_list = list(N = samp_ct,
ncat = 4L,
nvar = 2L,
ntotvar = 5L,
Y = as.integer(mod$trptrans),
x1 = mod%>%select(travelTimeDrive,travelCostDrive),
x2 = cbind(mod$travelTimeWalk,rep(0,samp_ct)),
x3 = cbind(mod$travelTimeBike,rep(0,samp_ct)),
x4 = cbind(mod$travelTimeTransit,rep(-1,samp_ct)),
N_1 = 62L,
M_1 = 6L,
J_1 = as.integer(mod$personid),
Z_1 = cbind(rep(1,samp_ct),mod%>%select(race_2,race_3,race_4,race_5,hhfaminc)),
NC_1 = 15L,
N_2 = 5L,
M_2 = 1L,
J_2 = as.integer(mod$trippurp),
prior_only = 0L)
model_fit = model.1$sample(data = data_list,
seed = 24567,
iter_warmup = 500,
iter_sampling =500,
chains = 4,
parallel_chains = 4)
Stan code:
functions {
/* compute correlated group-level effects
* Args:
* z: matrix of unscaled group-level effects
* SD: vector of standard deviation parameters
* L: cholesky factor correlation matrix
* Returns:
* matrix of scaled group-level effects
*/
matrix scale_r_cor(matrix z, vector SD, matrix L) {
// r is stored in another dimension order than z
return transpose(diag_pre_multiply(SD, L) * z);
}
}
data {
int<lower=1> N; // total number of observations
int<lower=2> ncat; // number of categories
int<lower=2> nvar; // number of variables for each alternative
int<lower=2> ntotvar; // number of total variables
array[N] int Y; // response variable
// covariate matrix for alt1
matrix[N,nvar] x1;
// covariate matrix for alt2
matrix[N,nvar] x2;
// covariate matrix for alt3
matrix[N,nvar] x3;
// covariate matrix for alt4
matrix[N,nvar] x4;
// data for group-level effects of ID 1
int<lower=1> N_1; // number of grouping levels
int<lower=1> M_1; // number of coefficients per level
int<lower=1> J_1[N]; // grouping indicator per observation
// group-level predictor values
matrix[N, M_1] Z_1;
int<lower=1> NC_1; // number of group-level correlations
// data for group-level effects of ID 2
int<lower=1> N_2; // number of grouping levels
int<lower=1> M_2; // number of coefficients per level
int<lower=1> J_2[N]; // grouping indicator per observation
int prior_only; // should the likelihood be ignored?
}
transformed data {
int<lower=1> M_3 = (M_1+M_2)*(ncat*2); // total number of sd
}
parameters {
real b_btc; // population-level effects
real b_a2; // population-level effects
real b_a3; // population-level effects
real b_a4; // population-level effects
real b_b1tt; // population-level effects
real b_b2tt; // population-level effects
real b_b3tt; // population-level effects
real b_b4tt; // population-level effects
vector<lower=0>[M_3] sd; // group-level standard deviations
matrix[M_1, N_1] z_1; // standardized group-level effects
cholesky_factor_corr[M_1] L_1; // cholesky factor of correlation matrix
vector[N_2] z_2[M_2]; // standardized group-level effects
matrix[M_1, N_1] z_3; // standardized group-level effects
cholesky_factor_corr[M_1] L_3; // cholesky factor of correlation matrix
vector[N_2] z_4[M_2]; // standardized group-level effects
matrix[M_1, N_1] z_5; // standardized group-level effects
cholesky_factor_corr[M_1] L_5; // cholesky factor of correlation matrix
vector[N_2] z_6[M_2]; // standardized group-level effects
matrix[M_1, N_1] z_7; // standardized group-level effects
cholesky_factor_corr[M_1] L_7; // cholesky factor of correlation matrix
vector[N_2] z_8[M_2]; // standardized group-level effects
matrix[M_1, N_1] z_9; // standardized group-level effects
cholesky_factor_corr[M_1] L_9; // cholesky factor of correlation matrix
vector[N_2] z_10[M_2]; // standardized group-level effects
matrix[M_1, N_1] z_11; // standardized group-level effects
cholesky_factor_corr[M_1] L_11; // cholesky factor of correlation matrix
vector[N_2] z_12[M_2]; // standardized group-level effects
matrix[M_1, N_1] z_13; // standardized group-level effects
cholesky_factor_corr[M_1] L_13; // cholesky factor of correlation matrix
vector[N_2] z_14[M_2]; // standardized group-level effects
matrix[M_1, N_1] z_15; // standardized group-level effects
cholesky_factor_corr[M_1] L_15; // cholesky factor of correlation matrix
vector[N_2] z_16[M_2]; // standardized group-level effects
}
transformed parameters {
matrix[N_1, M_1] r_1; // actual group-level effects
vector[N_2] r_2; // actual group-level effects
matrix[N_1, M_1] r_3; // actual group-level effects
vector[N_2] r_4; // actual group-level effects
matrix[N_1, M_1] r_5; // actual group-level effects
vector[N_2] r_6; // actual group-level effects
matrix[N_1, M_1] r_7; // actual group-level effects
vector[N_2] r_8; // actual group-level effects
matrix[N_1, M_1] r_9; // actual group-level effects
vector[N_2] r_10; // actual group-level effects
matrix[N_1, M_1] r_11; // actual group-level effects
vector[N_2] r_12; // actual group-level effects
matrix[N_1, M_1] r_13; // actual group-level effects
vector[N_2] r_14; // actual group-level effects
matrix[N_1, M_1] r_15; // actual group-level effects
vector[N_2] r_16; // actual group-level effects
// compute actual group-level effects
r_1 = scale_r_cor(z_1, sd[1:M_1], L_1);
r_2 = (sd[M_1+1] * (z_2[1]));
// compute actual group-level effects
r_3 = scale_r_cor(z_3, sd[M_1+2:2*M_1+1], L_3);
r_4 = (sd[2*M_1+2] * (z_4[1]));
// compute actual group-level effects
r_5 = scale_r_cor(z_5, sd[2*M_1+3:3*M_1+2], L_5);
r_6 = (sd[3*M_1+3] * (z_6[1]));
// compute actual group-level effects
r_7 = scale_r_cor(z_7, sd[3*M_1+4:4*M_1+3], L_7);
r_8 = (sd[4*M_1+4] * (z_8[1]));
// compute actual group-level effects
r_9 = scale_r_cor(z_9, sd[4*M_1+5:5*M_1+4], L_9);
r_10 = (sd[5*M_1+5] * (z_10[1]));
// compute actual group-level effects
r_11 = scale_r_cor(z_11, sd[5*M_1+6:6*M_1+5], L_11);
r_12 = (sd[5*M_1+6] * (z_12[1]));
// compute actual group-level effects
r_13 = scale_r_cor(z_13, sd[6*M_1+7:7*M_1+6], L_13);
r_14 = (sd[6*M_1+7] * (z_14[1]));
// compute actual group-level effects
r_15 = scale_r_cor(z_15, sd[7*M_1+8:8*M_1+7], L_15);
r_16 = (sd[8*M_1+8] * (z_16[1]));
}
model {
// likelihood including constants
if (!prior_only) {
// Define matrices/vector for x, alpha, beta
matrix[ncat,nvar] x;
matrix[ncat,nvar] beta;
vector[ncat] alpha;
matrix[N,ncat] a = rep_matrix(0,N,ncat);
a[,2] = rep_vector(b_a2,N);
a[,3] = rep_vector(b_a3,N);
a[,4] = rep_vector(b_a4,N);
// initialize linear predictor term
// Terms are btc, b1tt, b2tt, b3tt, b4tt
matrix[N,ntotvar] b = rep_matrix(b_btc,N,ntotvar);
b[,2] = rep_vector(b_b1tt,N);
b[,3] = rep_vector(b_b2tt,N);
b[,4] = rep_vector(b_b3tt,N);
b[,5] = rep_vector(b_b4tt,N);
for (n in 1:N) {
// add to linear predictor term
a[n] += [0,r_3[J_1[n]]*Z_1[n]' + r_4[J_2[n]], r_5[J_1[n]]*Z_1[n]' + r_6[J_2[n]], r_7[J_1[n]]*Z_1[n]' + r_8[J_2[n]]];
// add to linear predictor term
// Terms are btc, b1tt, b2tt, b3tt, b4tt
b[n] += [r_1[J_1[n]]*Z_1[n]' + r_2[J_2[n]], r_9[J_1[n]]*Z_1[n]' + r_10[J_2[n]], r_11[J_1[n]]*Z_1[n]' + r_12[J_2[n]],r_13[J_1[n]]*Z_1[n]' + r_14[J_2[n]],r_15[J_1[n]]*Z_1[n]' + r_16[J_2[n]]];
b[n] = exp(b[n]);
// Each x and beta is a matrix with dimensions alts x variables
// Our x will be the time/cost coming in as inputs
x[1,] = x1[n];
x[2,] = x2[n];
x[3,] = x3[n];
x[4,] = x4[n];
// Our betas will be the hierarchical slope parameters
beta[1,] = [b[n,1] * b[n,2], b[n,1]];
beta[2,] = [b[n,1] * b[n,3], b[n,1]];
beta[3,] = [b[n,1] * b[n,4], b[n,1]];
beta[4,] = [b[n,1] * b[n,5], b[n,1]];
// Our alphas will be the hierarchical intercept parameters
alpha = [a[n,1], b[n,1] * a[n,2], b[n,1] * a[n,3], b[n,1] * a[n,4]]';
target += categorical_logit_glm_lupmf(Y[n] | x, alpha, beta');
}
}
// priors including constants
target += normal_lpdf(b_btc | 0, 2.5);
target += normal_lpdf(b_a2 | 0, 2.5);
target += normal_lpdf(b_a3 | 0, 2.5);
target += normal_lpdf(b_a4 | 0, 2.5);
target += normal_lpdf(b_b1tt | 0, 2.5);
target += normal_lpdf(b_b2tt | 0, 2.5);
target += normal_lpdf(b_b3tt | 0, 2.5);
target += normal_lpdf(b_b4tt | 0, 2.5);
target += student_t_lpdf(sd | 3, 0, 2.5);
target += std_normal_lpdf(to_vector(z_1));
target += lkj_corr_cholesky_lpdf(L_1 | 1);
target += std_normal_lpdf(z_2[1]);
target += std_normal_lpdf(to_vector(z_3));
target += lkj_corr_cholesky_lpdf(L_3 | 1);
target += std_normal_lpdf(z_4[1]);
target += std_normal_lpdf(to_vector(z_5));
target += lkj_corr_cholesky_lpdf(L_5 | 1);
target += std_normal_lpdf(z_6[1]);
target += std_normal_lpdf(to_vector(z_7));
target += lkj_corr_cholesky_lpdf(L_7 | 1);
target += std_normal_lpdf(z_8[1]);
target += std_normal_lpdf(to_vector(z_9));
target += lkj_corr_cholesky_lpdf(L_9 | 1);
target += std_normal_lpdf(z_10[1]);
target += std_normal_lpdf(to_vector(z_11));
target += lkj_corr_cholesky_lpdf(L_11 | 1);
target += std_normal_lpdf(z_12[1]);
target += std_normal_lpdf(to_vector(z_13));
target += lkj_corr_cholesky_lpdf(L_13 | 1);
target += std_normal_lpdf(z_14[1]);
target += std_normal_lpdf(to_vector(z_15));
target += lkj_corr_cholesky_lpdf(L_15 | 1);
target += std_normal_lpdf(z_16[1]);
}
Submit script:
#!/bin/bash
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=4
#SBATCH --partition=gpu
#SBATCH --mem=16G
#SBATCH --gres=gpu
#SBATCH --constraint=gpu_v100
#SBATCH --time=1:00:00
#SBATCH --job-name=R_mtc_base
#SBATCH --error=R_mtc_gpu.%J.err
#SBATCH --output=R_mtc_gpu.%J.out
module load crane
module load R/4.1
module load cuda/11.4
Rscript mtc_equity_2.R