In brms, I have recently implemented a feature to model different thresholds for different groups of observations. However, the model is not only terribly slow, it often doesn’t run at all, soaks up multiple GB of RAM (effectively all my RAM even for small data) or crashes the R session right away. It is likely a stupid mistake I made, but I don’t see it. Also, I would think that the memory problems should not occur even if I made a coding mistake. Anyway, I would be grateful for feedback and ideas.
The simplified Stan code looks as follows:
functions {
/* cumulative-logit log-PDF for a single response and merged thresholds
* Args:
* y: response category
* mu: latent mean parameter
* thres: vector of merged ordinal thresholds
* j: start and end index for the applid threshold within 'thres'
* Returns:
* a scalar to be added to the log posterior
*/
real cumulative_logit_merged_lpmf(int y, real mu, vector thres, int[] j) {
return ordered_logistic_lpmf(y | mu, thres[j[1]:j[2]]);
}
}
data {
int<lower=1> N; // number of observations
int Y[N]; // response variable
int<lower=1> ngrthres; // number of threshold groups
int<lower=1> nthres[ngrthres]; // number of thresholds
int<lower=1> Jthres[N, 2]; // threshold indices
}
transformed data {
int<lower=1> nmthres = prod(nthres); // total number of thresholds
int<lower=1> Kthres_start[ngrthres]; // start index per threshold group
int<lower=1> Kthres_end[ngrthres]; // end index per threshold group
Kthres_start[1] = 1;
Kthres_end[1] = nthres[1];
for (i in 2:ngrthres) {
Kthres_start[i] = Kthres_end[i-1] + 1;
Kthres_end[i] = Kthres_end[i-1] + nthres[i];
}
}
parameters {
ordered[nthres[1]] Intercept_1; // temporary thresholds for centered predictors
ordered[nthres[2]] Intercept_2; // temporary thresholds for centered predictors
ordered[nthres[3]] Intercept_3; // temporary thresholds for centered predictors
ordered[nthres[4]] Intercept_4; // temporary thresholds for centered predictors
ordered[nthres[5]] Intercept_5; // temporary thresholds for centered predictors
ordered[nthres[6]] Intercept_6; // temporary thresholds for centered predictors
ordered[nthres[7]] Intercept_7; // temporary thresholds for centered predictors
ordered[nthres[8]] Intercept_8; // temporary thresholds for centered predictors
ordered[nthres[9]] Intercept_9; // temporary thresholds for centered predictors
ordered[nthres[10]] Intercept_10; // temporary thresholds for centered predictors
ordered[nthres[11]] Intercept_11; // temporary thresholds for centered predictors
ordered[nthres[12]] Intercept_12; // temporary thresholds for centered predictors
ordered[nthres[13]] Intercept_13; // temporary thresholds for centered predictors
ordered[nthres[14]] Intercept_14; // temporary thresholds for centered predictors
ordered[nthres[15]] Intercept_15; // temporary thresholds for centered predictors
ordered[nthres[16]] Intercept_16; // temporary thresholds for centered predictors
ordered[nthres[17]] Intercept_17; // temporary thresholds for centered predictors
ordered[nthres[18]] Intercept_18; // temporary thresholds for centered predictors
ordered[nthres[19]] Intercept_19; // temporary thresholds for centered predictors
ordered[nthres[20]] Intercept_20; // temporary thresholds for centered predictors
}
transformed parameters {
}
model {
vector[nmthres] merged_Intercept; // merged thresholds
// initialize linear predictor term
vector[N] mu = rep_vector(0, N);
merged_Intercept[Kthres_start[1]:Kthres_end[1]] = Intercept_1;
merged_Intercept[Kthres_start[2]:Kthres_end[2]] = Intercept_2;
merged_Intercept[Kthres_start[3]:Kthres_end[3]] = Intercept_3;
merged_Intercept[Kthres_start[4]:Kthres_end[4]] = Intercept_4;
merged_Intercept[Kthres_start[5]:Kthres_end[5]] = Intercept_5;
merged_Intercept[Kthres_start[6]:Kthres_end[6]] = Intercept_6;
merged_Intercept[Kthres_start[7]:Kthres_end[7]] = Intercept_7;
merged_Intercept[Kthres_start[8]:Kthres_end[8]] = Intercept_8;
merged_Intercept[Kthres_start[9]:Kthres_end[9]] = Intercept_9;
merged_Intercept[Kthres_start[10]:Kthres_end[10]] = Intercept_10;
merged_Intercept[Kthres_start[11]:Kthres_end[11]] = Intercept_11;
merged_Intercept[Kthres_start[12]:Kthres_end[12]] = Intercept_12;
merged_Intercept[Kthres_start[13]:Kthres_end[13]] = Intercept_13;
merged_Intercept[Kthres_start[14]:Kthres_end[14]] = Intercept_14;
merged_Intercept[Kthres_start[15]:Kthres_end[15]] = Intercept_15;
merged_Intercept[Kthres_start[16]:Kthres_end[16]] = Intercept_16;
merged_Intercept[Kthres_start[17]:Kthres_end[17]] = Intercept_17;
merged_Intercept[Kthres_start[18]:Kthres_end[18]] = Intercept_18;
merged_Intercept[Kthres_start[19]:Kthres_end[19]] = Intercept_19;
merged_Intercept[Kthres_start[20]:Kthres_end[20]] = Intercept_20;
// priors including all constants
target += normal_lpdf(Intercept_1 | 0, 1);
target += normal_lpdf(Intercept_2 | 0, 1);
target += normal_lpdf(Intercept_3 | 0, 1);
target += normal_lpdf(Intercept_4 | 0, 1);
target += normal_lpdf(Intercept_5 | 0, 1);
target += normal_lpdf(Intercept_6 | 0, 1);
target += normal_lpdf(Intercept_7 | 0, 1);
target += normal_lpdf(Intercept_8 | 0, 1);
target += normal_lpdf(Intercept_9 | 0, 1);
target += normal_lpdf(Intercept_10 | 0, 1);
target += normal_lpdf(Intercept_11 | 0, 1);
target += normal_lpdf(Intercept_12 | 0, 1);
target += normal_lpdf(Intercept_13 | 0, 1);
target += normal_lpdf(Intercept_14 | 0, 1);
target += normal_lpdf(Intercept_15 | 0, 1);
target += normal_lpdf(Intercept_16 | 0, 1);
target += normal_lpdf(Intercept_17 | 0, 1);
target += normal_lpdf(Intercept_18 | 0, 1);
target += normal_lpdf(Intercept_19 | 0, 1);
target += normal_lpdf(Intercept_20 | 0, 1);
// likelihood including all constants
for (n in 1:N) {
target += cumulative_logit_merged_lpmf(Y[n] | mu[n], merged_Intercept, Jthres[n]);
}
}
generated quantities {
// compute actual thresholds
vector[nthres[1]] b_Intercept_1 = Intercept_1;
vector[nthres[2]] b_Intercept_2 = Intercept_2;
vector[nthres[3]] b_Intercept_3 = Intercept_3;
vector[nthres[4]] b_Intercept_4 = Intercept_4;
vector[nthres[5]] b_Intercept_5 = Intercept_5;
vector[nthres[6]] b_Intercept_6 = Intercept_6;
vector[nthres[7]] b_Intercept_7 = Intercept_7;
vector[nthres[8]] b_Intercept_8 = Intercept_8;
vector[nthres[9]] b_Intercept_9 = Intercept_9;
vector[nthres[10]] b_Intercept_10 = Intercept_10;
vector[nthres[11]] b_Intercept_11 = Intercept_11;
vector[nthres[12]] b_Intercept_12 = Intercept_12;
vector[nthres[13]] b_Intercept_13 = Intercept_13;
vector[nthres[14]] b_Intercept_14 = Intercept_14;
vector[nthres[15]] b_Intercept_15 = Intercept_15;
vector[nthres[16]] b_Intercept_16 = Intercept_16;
vector[nthres[17]] b_Intercept_17 = Intercept_17;
vector[nthres[18]] b_Intercept_18 = Intercept_18;
vector[nthres[19]] b_Intercept_19 = Intercept_19;
vector[nthres[20]] b_Intercept_20 = Intercept_20;
}
The data can be generated and model run as follows:
library(tidyverse)
library(rstan)
library(brms)
set.seed(12345)
nyears <- 20
norm <- rep(rnorm(100, mean = 0, sd = 2), nyears)
year <- rep(1:nyears, each = 100)
trend <- rep(cos(unique(year)/3), each = 100)
trust <- norm + trend
plot(year, trust)
plot(year, trend)
data <- cbind(norm, year, trend, trust) %>%
data.frame() %>%
mutate(
trust_ord = cut(trust, c(-20,-2,-1,0,1,2,20), ordered_result = TRUE),
year_f = factor(year)
)
sdata <- make_standata(
trust_ord | thres(5, year_f) ~ 1,
data = data,
family = cumulative(),
prior = prior(normal(0, 1), class = "Intercept"),
chains = 1
)
scode <- "
functions {
/* cumulative-logit log-PDF for a single response and merged thresholds
* Args:
* y: response category
* mu: latent mean parameter
* thres: vector of merged ordinal thresholds
* j: start and end index for the applid threshold within 'thres'
* Returns:
* a scalar to be added to the log posterior
*/
real cumulative_logit_merged_lpmf(int y, real mu, vector thres, int[] j) {
return ordered_logistic_lpmf(y | mu, thres[j[1]:j[2]]);
}
}
data {
int<lower=1> N; // number of observations
int Y[N]; // response variable
int<lower=1> ngrthres; // number of threshold groups
int<lower=1> nthres[ngrthres]; // number of thresholds
int<lower=1> Jthres[N, 2]; // threshold indices
}
transformed data {
int<lower=1> nmthres = prod(nthres); // total number of thresholds
int<lower=1> Kthres_start[ngrthres]; // start index per threshold group
int<lower=1> Kthres_end[ngrthres]; // end index per threshold group
Kthres_start[1] = 1;
Kthres_end[1] = nthres[1];
for (i in 2:ngrthres) {
Kthres_start[i] = Kthres_end[i-1] + 1;
Kthres_end[i] = Kthres_end[i-1] + nthres[i];
}
}
parameters {
ordered[nthres[1]] Intercept_1; // temporary thresholds for centered predictors
ordered[nthres[2]] Intercept_2; // temporary thresholds for centered predictors
ordered[nthres[3]] Intercept_3; // temporary thresholds for centered predictors
ordered[nthres[4]] Intercept_4; // temporary thresholds for centered predictors
ordered[nthres[5]] Intercept_5; // temporary thresholds for centered predictors
ordered[nthres[6]] Intercept_6; // temporary thresholds for centered predictors
ordered[nthres[7]] Intercept_7; // temporary thresholds for centered predictors
ordered[nthres[8]] Intercept_8; // temporary thresholds for centered predictors
ordered[nthres[9]] Intercept_9; // temporary thresholds for centered predictors
ordered[nthres[10]] Intercept_10; // temporary thresholds for centered predictors
ordered[nthres[11]] Intercept_11; // temporary thresholds for centered predictors
ordered[nthres[12]] Intercept_12; // temporary thresholds for centered predictors
ordered[nthres[13]] Intercept_13; // temporary thresholds for centered predictors
ordered[nthres[14]] Intercept_14; // temporary thresholds for centered predictors
ordered[nthres[15]] Intercept_15; // temporary thresholds for centered predictors
ordered[nthres[16]] Intercept_16; // temporary thresholds for centered predictors
ordered[nthres[17]] Intercept_17; // temporary thresholds for centered predictors
ordered[nthres[18]] Intercept_18; // temporary thresholds for centered predictors
ordered[nthres[19]] Intercept_19; // temporary thresholds for centered predictors
ordered[nthres[20]] Intercept_20; // temporary thresholds for centered predictors
}
transformed parameters {
}
model {
vector[nmthres] merged_Intercept; // merged thresholds
// initialize linear predictor term
vector[N] mu = rep_vector(0, N);
merged_Intercept[Kthres_start[1]:Kthres_end[1]] = Intercept_1;
merged_Intercept[Kthres_start[2]:Kthres_end[2]] = Intercept_2;
merged_Intercept[Kthres_start[3]:Kthres_end[3]] = Intercept_3;
merged_Intercept[Kthres_start[4]:Kthres_end[4]] = Intercept_4;
merged_Intercept[Kthres_start[5]:Kthres_end[5]] = Intercept_5;
merged_Intercept[Kthres_start[6]:Kthres_end[6]] = Intercept_6;
merged_Intercept[Kthres_start[7]:Kthres_end[7]] = Intercept_7;
merged_Intercept[Kthres_start[8]:Kthres_end[8]] = Intercept_8;
merged_Intercept[Kthres_start[9]:Kthres_end[9]] = Intercept_9;
merged_Intercept[Kthres_start[10]:Kthres_end[10]] = Intercept_10;
merged_Intercept[Kthres_start[11]:Kthres_end[11]] = Intercept_11;
merged_Intercept[Kthres_start[12]:Kthres_end[12]] = Intercept_12;
merged_Intercept[Kthres_start[13]:Kthres_end[13]] = Intercept_13;
merged_Intercept[Kthres_start[14]:Kthres_end[14]] = Intercept_14;
merged_Intercept[Kthres_start[15]:Kthres_end[15]] = Intercept_15;
merged_Intercept[Kthres_start[16]:Kthres_end[16]] = Intercept_16;
merged_Intercept[Kthres_start[17]:Kthres_end[17]] = Intercept_17;
merged_Intercept[Kthres_start[18]:Kthres_end[18]] = Intercept_18;
merged_Intercept[Kthres_start[19]:Kthres_end[19]] = Intercept_19;
merged_Intercept[Kthres_start[20]:Kthres_end[20]] = Intercept_20;
// priors including all constants
target += normal_lpdf(Intercept_1 | 0, 1);
target += normal_lpdf(Intercept_2 | 0, 1);
target += normal_lpdf(Intercept_3 | 0, 1);
target += normal_lpdf(Intercept_4 | 0, 1);
target += normal_lpdf(Intercept_5 | 0, 1);
target += normal_lpdf(Intercept_6 | 0, 1);
target += normal_lpdf(Intercept_7 | 0, 1);
target += normal_lpdf(Intercept_8 | 0, 1);
target += normal_lpdf(Intercept_9 | 0, 1);
target += normal_lpdf(Intercept_10 | 0, 1);
target += normal_lpdf(Intercept_11 | 0, 1);
target += normal_lpdf(Intercept_12 | 0, 1);
target += normal_lpdf(Intercept_13 | 0, 1);
target += normal_lpdf(Intercept_14 | 0, 1);
target += normal_lpdf(Intercept_15 | 0, 1);
target += normal_lpdf(Intercept_16 | 0, 1);
target += normal_lpdf(Intercept_17 | 0, 1);
target += normal_lpdf(Intercept_18 | 0, 1);
target += normal_lpdf(Intercept_19 | 0, 1);
target += normal_lpdf(Intercept_20 | 0, 1);
// likelihood including all constants
for (n in 1:N) {
target += cumulative_logit_merged_lpmf(Y[n] | mu[n], merged_Intercept, Jthres[n]);
}
}
generated quantities {
// compute actual thresholds
vector[nthres[1]] b_Intercept_1 = Intercept_1;
vector[nthres[2]] b_Intercept_2 = Intercept_2;
vector[nthres[3]] b_Intercept_3 = Intercept_3;
vector[nthres[4]] b_Intercept_4 = Intercept_4;
vector[nthres[5]] b_Intercept_5 = Intercept_5;
vector[nthres[6]] b_Intercept_6 = Intercept_6;
vector[nthres[7]] b_Intercept_7 = Intercept_7;
vector[nthres[8]] b_Intercept_8 = Intercept_8;
vector[nthres[9]] b_Intercept_9 = Intercept_9;
vector[nthres[10]] b_Intercept_10 = Intercept_10;
vector[nthres[11]] b_Intercept_11 = Intercept_11;
vector[nthres[12]] b_Intercept_12 = Intercept_12;
vector[nthres[13]] b_Intercept_13 = Intercept_13;
vector[nthres[14]] b_Intercept_14 = Intercept_14;
vector[nthres[15]] b_Intercept_15 = Intercept_15;
vector[nthres[16]] b_Intercept_16 = Intercept_16;
vector[nthres[17]] b_Intercept_17 = Intercept_17;
vector[nthres[18]] b_Intercept_18 = Intercept_18;
vector[nthres[19]] b_Intercept_19 = Intercept_19;
vector[nthres[20]] b_Intercept_20 = Intercept_20;
}
"
fit <- stan(model_code = scode, data = sdata)