Hi,
currently, my code is made as a “plugin” for brms
(which is a bit annoying to do, but gives me some additional flexibility). So to have a concrete example, I generate some simple data as:
rcumulative_logit <- function(N, mean, thresholds) {
raw <- rlogis(N) + mean
res <- rep(1, N)
for(t in thresholds) {
res <- res + (t < raw)
}
res
}
set.seed(58224522)
N_patients <- 7
simple_test <- data.frame(patient = factor(1:N_patients)) %>%
crossing(time_from_diagnosis = c(0, 3, 5)) %>%
mutate(q1 = rcumulative_logit(n(), rnorm(N_patients)[as.integer(patient)] + time_from_diagnosis * 0.5, thresholds = c(-1,-0.3,0,1)),
q2 = rcumulative_logit(n(), rnorm(N_patients, sd = 0.5)[as.integer(patient)] + time_from_diagnosis * 0.1, thresholds = c(-0.4,-0.1,0.5,1.5)),
q3 = rcumulative_logit(n(), rnorm(N_patients, sd = 2)[as.integer(patient)] + time_from_diagnosis * 0.3, thresholds = c(-2,-0.8,-0.2,1.1)),
obs_id = 1:n()
)
simple_test
# A tibble: 21 x 6
patient time_from_diagnosis q1 q2 q3 obs_id
<fct> <dbl> <dbl> <dbl> <dbl> <int>
1 1 0 1 1 5 1
2 1 3 5 5 4 2
3 1 5 5 5 5 3
4 2 0 4 1 1 4
5 2 3 4 3 4 5
6 2 5 5 1 4 6
7 3 0 1 5 1 7
8 3 3 1 4 3 8
9 3 5 4 4 2 9
10 4 0 4 2 4 10
I then run the hacked-around brms model with formula bf(mvbind(q1,q2,q3) ~ 1 + time_from_diagnosis
giving the following data for Stan and full Stan code:
$N_q1
[1] 21
$Y_q1
[1] 1 5 5 4 4 5 1 1 4 4 5 5 4 5 5 5 5 5 5 5 5
$nthres_q1
[1] 4
$N_q2
[1] 21
$Y_q2
[1] 1 5 5 1 3 1 5 4 4 2 5 1 1 5 5 1 4 5 4 1 2
$nthres_q2
[1] 4
$N_q3
[1] 21
$Y_q3
[1] 5 4 5 1 4 4 1 3 2 4 5 4 5 5 4 5 5 5 1 1 2
$nthres_q3
[1] 4
$N
[1] 21
$K_q1
[1] 1
$X_q1
time_from_diagnosis
1 0
2 3
3 5
4 0
5 3
6 5
7 0
8 3
9 5
10 0
11 3
12 5
13 0
14 3
15 5
16 0
17 3
18 5
19 0
20 3
21 5
$K_q2
[1] 1
$X_q2
time_from_diagnosis
1 0
2 3
3 5
4 0
5 3
6 5
7 0
8 3
9 5
10 0
11 3
12 5
13 0
14 3
15 5
16 0
17 3
18 5
19 0
20 3
21 5
$K_q3
[1] 1
$X_q3
time_from_diagnosis
1 0
2 3
3 5
4 0
5 3
6 5
7 0
8 3
9 5
10 0
11 3
12 5
13 0
14 3
15 5
16 0
17 3
18 5
19 0
20 3
21 5
$prior_only
[1] 0
// generated with brms 2.15.7
functions {
real empty_cumulative_lpmf(int y, real mu, vector intercept) {
return 0;
}
real approx_Phi(real x) {
return inv_logit(x * 1.702);
}
real approx_inv_Phi(real x) {
return logit(x) / 1.702;
}
}
data {
int<lower=1> N; // total number of observations
int<lower=1> N_q1; // number of observations
int Y_q1[N_q1]; // response variable
int<lower=2> nthres_q1; // number of thresholds
int<lower=1> K_q1; // number of population-level effects
matrix[N_q1, K_q1] X_q1; // population-level design matrix
int<lower=1> N_q2; // number of observations
int Y_q2[N_q2]; // response variable
int<lower=2> nthres_q2; // number of thresholds
int<lower=1> K_q2; // number of population-level effects
matrix[N_q2, K_q2] X_q2; // population-level design matrix
int<lower=1> N_q3; // number of observations
int Y_q3[N_q3]; // response variable
int<lower=2> nthres_q3; // number of thresholds
int<lower=1> K_q3; // number of population-level effects
matrix[N_q3, K_q3] X_q3; // population-level design matrix
int prior_only; // should the likelihood be ignored?
}
transformed data {
int Kc_q1 = K_q1;
matrix[N_q1, Kc_q1] Xc_q1; // centered version of X_q1
vector[Kc_q1] means_X_q1; // column means of X_q1 before centering
int Kc_q2 = K_q2;
matrix[N_q2, Kc_q2] Xc_q2; // centered version of X_q2
vector[Kc_q2] means_X_q2; // column means of X_q2 before centering
int Kc_q3 = K_q3;
matrix[N_q3, Kc_q3] Xc_q3; // centered version of X_q3
vector[Kc_q3] means_X_q3; // column means of X_q3 before centering
int N_thres = nthres_q1;
if(N != N_q1) { reject("Requiring equal sample size in all dimensions."); }
if(N != N_q2) { reject("Requiring equal sample size in all dimensions."); }
if(N != N_q3) { reject("Requiring equal sample size in all dimensions."); }
if(nthres_q1 != nthres_q2) { reject("Requiring equal number of thresholds in all dimensions."); }
if(nthres_q1 != nthres_q3) { reject("Requiring equal number of thresholds in all dimensions."); }
for (i in 1:K_q1) {
means_X_q1[i] = mean(X_q1[, i]);
Xc_q1[, i] = X_q1[, i] - means_X_q1[i];
}
for (i in 1:K_q2) {
means_X_q2[i] = mean(X_q2[, i]);
Xc_q2[, i] = X_q2[, i] - means_X_q2[i];
}
for (i in 1:K_q3) {
means_X_q3[i] = mean(X_q3[, i]);
Xc_q3[, i] = X_q3[, i] - means_X_q3[i];
}
}
parameters {
vector[Kc_q1] b_q1; // population-level effects
ordered[nthres_q1] Intercept_q1; // temporary thresholds for centered predictors
vector[Kc_q2] b_q2; // population-level effects
ordered[nthres_q2] Intercept_q2; // temporary thresholds for centered predictors
vector[Kc_q3] b_q3; // population-level effects
ordered[nthres_q3] Intercept_q3; // temporary thresholds for centered predictors
cholesky_factor_corr[3] L_rescor;
real<lower=0, upper=1> u[N, 3]; // raw residuals
}
transformed parameters {
}
model {
vector[N_thres] thresholds[3];
thresholds[1] = Intercept_q1;
thresholds[2] = Intercept_q2;
thresholds[3] = Intercept_q3;
target += lkj_corr_cholesky_lpdf(L_rescor | 1);
// likelihood including constants
if (!prior_only) {
// initialize linear predictor term
vector[N_q1] mu_q1 = Xc_q1 * b_q1;
// initialize linear predictor term
vector[N_q2] mu_q2 = Xc_q2 * b_q2;
// initialize linear predictor term
vector[N_q3] mu_q3 = Xc_q3 * b_q3;
for (n in 1:N_q1) {
target += empty_cumulative_lpmf(Y_q1[n] | mu_q1[n], Intercept_q1);
}
for (n in 1:N_q2) {
target += empty_cumulative_lpmf(Y_q2[n] | mu_q2[n], Intercept_q2);
}
for (n in 1:N_q3) {
target += empty_cumulative_lpmf(Y_q3[n] | mu_q3[n], Intercept_q3);
}
for(n in 1:N) {
real mus[3] = {mu_q1[n], mu_q2[n], mu_q3[n]};
int Ys[3] = {Y_q1[n], Y_q2[n], Y_q3[n]};
vector[3] z;
real prev;
prev = 0;
for (d in 1:3) {
real t; // threshold at which utility = 0
if (Ys[d] == 1){
real ub = approx_Phi((thresholds[d, 1] -(mus[d] + prev)) / L_rescor[d,d]);
t = ub * u[n,d];
target += log(ub); // Jacobian adjustment
} else if (Ys[d] == N_thres + 1) {
real lb = approx_Phi((thresholds[d, N_thres] -(mus[d] + prev)) / L_rescor[d,d]);
t = lb + (1 - lb) * u[n,d];
target += log1m(lb); // Jacobian adjustment
} else {
real lb = approx_Phi((thresholds[d, Ys[d] - 1] -(mus[d] + prev)) / L_rescor[d,d]);
real ub = approx_Phi((thresholds[d, Ys[d] ] -(mus[d] + prev)) / L_rescor[d,d]);
t = lb + (ub - lb) * u[n,d];
target += log(ub - lb);
}
z[d] = approx_inv_Phi(t);
if (d < 3) prev = L_rescor[d+1,1:d] * head(z, d);
// Jacobian adjustments imply z is truncated standard normal
// thus utility --- mu + L_rescor * z --- is truncated multivariate normal
}
}
}
// priors including constants
target += student_t_lpdf(Intercept_q1 | 3, 0, 2.5);
target += student_t_lpdf(Intercept_q2 | 3, 0, 2.5);
target += student_t_lpdf(Intercept_q3 | 3, 0, 2.5);
}
generated quantities {
// compute actual thresholds
vector[nthres_q1] b_q1_Intercept = Intercept_q1 + dot_product(means_X_q1, b_q1);
// compute actual thresholds
vector[nthres_q2] b_q2_Intercept = Intercept_q2 + dot_product(means_X_q2, b_q2);
// compute actual thresholds
vector[nthres_q3] b_q3_Intercept = Intercept_q3 + dot_product(means_X_q3, b_q3);
corr_matrix[3] Rescor = multiply_lower_tri_self_transpose(L_rescor);
vector<lower=-1,upper=1>[3] rescor;
// extract upper diagonal of rescor matrix
for (k in 1:3) {
for (j in 1:(k - 1)) {
rescor[choose(k - 1, 2) + j] = Rescor[j, k];
}
}
}
Hope that makes it a bit clearer. In particular, you can do anything you want to compute mus
, the ordinal variables start at 1 and you can definitely put any prior you like on the thresholds. The empty_cumulative_lpmf
is just a hack to force brms
to ignore it’s default likelihood and do what I want. Also, instead of using N_dims
, 3
is hardcoded (because the code is completely generated).
I also realized that the best way to combine with continous variables might be to go the other way around. If the continuous variables were just multivariate normal, you would:
- Take the marginal means and correlation matrix for the continous variables and compute their likelihood
- Compute the conditional means and correlation matrix for the probit variables given the observed continuous values (e.g. as in Multivariate normal distribution - Wikipedia). I don’t doubt the computations can be rewritten to take advantage of the fact that you have a Cholesky decomposition, I just don’t have enough skill in linear algebra to see it quickly.
- Use the conditional means and correlation in the GHK algorithm.
Generalizing to non-normal continuous distributions should then be possible by converting by the appropriate CDF and then normal inverse CDF. (this is something I am less sure about, so please double check)
It is quite possible that you can do this all in one go by just modifying the GHK algorithm - once again, I am not good enough in linear algebra to see this quickly.
It looks like a tough model, so hope you can succed in building it.