Hi all,
I have a bit of an esoteric question so bear with me…
Let’s say I fit a multivariate cumulative probit model with @bgoodri 's code (see below). y is some ordinal outcome on the scale 1-12 and x is a continuous real value (age). Everything looks great here really. No issues.
Now, I have an out-of-sample dataset - \hat{y} is out of sample ordinal scores and \hat{x} is out of sample age. I want to ask one simple question: Given the model, what is the likelihood of this new sample occurring? I think this is p(\hat{y} | \hat{x}, \theta). Ultimately, I’d like to end up with a profile with age on the x-axis and likelihood on the y-axis to show how the likelihood changes with age. I am trying to address how rare this new data point may be in comparison to the much larger training dataset.
Is there a way to complete this step in the generated quantities block? Can I do this post-hoc after I’ve already fit the model in cmdstanr? I again ‘think’ this is just dmvnnorm
in R (with augmented variables?)
I hope this makes sense!
data {
int<lower=1> D;
int<lower=0> N;
array[N, D] int<lower=1> y;
array [N] real X;
int cats;
}
transformed data{
int n_thr = cats-1;
}
parameters {
cholesky_factor_corr[D] L;
array[N, D] real<lower=0, upper=1> u; // nuisance that absorbs inequality constraints
array[D] ordered[n_thr] thr;
array [D] real<lower=0> beta;
}
model {
beta ~ normal(0,5);
L ~ lkj_corr_cholesky(4);
for(d in 1:D){
for(i in 1:n_thr){
thr[d,i] ~ normal(i+1,1);
}
}
// implicit: u is iid standard uniform a priori
{
for (n in 1 : N) {
array [N] vector[D] z;
array [N] vector[D] mu;
real prev;
prev = 0;
for (d in 1 : D) {
mu[n,d] = beta[d]*X[n];
// Phi and inv_Phi may overflow and / or be numerically inaccurate
real t;
if (y[n,d] == 99) {
z[n,d] = inv_Phi(u[n,d]);
target += log(1);
} else if (y[n, d] == 1) {
real ub = Phi((thr[d,1] - (mu[n,d] + prev)) / L[d,d]);
t = ub * u[n, d];
z[n,d] = inv_Phi(t); // implies utility is positive
target += log(ub); // Jacobian adjustment
} else if (y[n,d] == n_thr + 1) {
real lb = Phi((thr[d, n_thr] -(mu[n,d] + prev)) / L[d,d]);
t = lb + (1 - lb) * u[n,d];
z[n,d] = inv_Phi(t); // implies utility is negative
target += log1m(lb); // Jacobian adjustment
} else{
real lb = Phi((thr[d, y[n,d] - 1] -(mu[n,d] + prev)) / L[d,d]);
real ub = Phi((thr[d, y[n,d] ] -(mu[n,d] + prev)) / L[d,d]);
t = lb + (ub - lb) * u[n,d];
z[n,d] = inv_Phi(t); // implies utility is negative
target += log(ub - lb);
}
if (d < D) {
prev = L[d + 1, 1 : d] * head(z[n], d);
}
// Jacobian adjustments imply z is truncated standard normal
// thus utility --- mu + L_Omega * z --- is truncated multivariate normal
}
}
}
}
generated quantities {
corr_matrix[D] Omega;
Omega = multiply_lower_tri_self_transpose(L);
}