Below is a GLMM, with intercepts and beta parameters that differ by group (no pooling), and a sigma parameter in the call to normal() that also differs by group. Fighting with the syntax for the X * B vector multiplication when the Bs differ by group led me to just stick everything in loops, but this model is hopelessly slow (takes around an hour to train on only 5000 rows of data). I compound this by also making predictions for a test set in the generated quantities section, which again is done with a loop. This section I would really like to try and vectorize, but am unsure how to achieve this given that I’m making predictions using normal_rng() and I have that complexity in producing mu_test from those terms that differ by group, and the sigma here differs by group.
Any help folks can offer would be greatly appreciated. I’m familiar with vectorizing simple models where intercepts and beta parameters are fixed, but not with mixed-models.
data {
int<lower=0> N; // number of samples
int<lower=0> K; // number of predictors
int<lower=1> J; // number of groups
vector[N] y; // response variable
matrix[N, K] X; // predictor matrix
int x_group[N]; // the group ids for each sample
vector[N] y_test; // response variable testing set
matrix[N, K] X_test; // predictor matrix testing set
int x_group_test[N]; // the group ids for each sample testing set
}
parameters {
vector[J] alpha; // vector of group level intercepts
vector[K] beta[J]; // coefficients for predictors, one for each J groups
vector<lower=0>[J] sigma;
}
transformed parameters {
vector[N] mu;
for (i in 1:N){
mu[i] = alpha[x_group[i]] + X[i] * (beta[x_group[i]]);
}
}
model {
// priors
for (j in 1:J){
sigma[j] ~ exponential(1);
}
for (i in 1:N){
y[i] ~ normal(mu[i], sigma[x_group[i]]);
}
}
generated quantities {
vector[N] log_lik;
vector[N] y_hat;
vector[N] resid;
vector[N] mu_test;
vector[N] y_pred;
vector[N] squared_error;
real MSE;
vector[N] abs_per_error;
vector[2] MAPE_denom;
real MAPE;
for (i in 1:N) {
log_lik[i] = normal_lpdf(y[i] | mu[i], sigma[x_group[i]]);
y_hat[i] = normal_rng(mu[i], sigma[x_group[i]]);
resid[i] = y[i] - y_hat[i];
// make prediction
mu_test[i] = alpha[x_group_test[i]] + X_test[i] * beta[x_group_test[i]];
y_pred[i] = normal_rng(mu_test[i], sigma[x_group_test[i]]);
// calc squared error
squared_error[i] = square(y_test[i] - y_pred[i]);
// calc Absolute Percentage Error
MAPE_denom[1] = 0.00001; // need to set to account for 0s
MAPE_denom[2] = abs(y_test[i]);
abs_per_error[i] = abs(y_test[i] - y_pred[i]) / max(MAPE_denom);
}
MSE = mean(squared_error);
MAPE = mean(abs_per_error);
}