Hello - thank you!
The indices relate to my above comment about “hacking” to deal with the different variable types I have. It’s quite likely there is a more efficient way to code it up?
In the above code, k=3, where k1 is binary and 2-3 is polytomous. tau
is a 2D array of vectors for the two poly variables. I’ve ordered the input of my response variable so binary is first. So in the above code, k=1 is not included in that step due to the if…else statement. For k=2 it would be 1+2-2 so the index would be tau[1,1]
, for k=3 it would be tau[2,1]
or the second vector of the 2D array and the the first threshold. Does this make sense?
In your gold standard model, you use s
to index which tau
vector to use. When I tried this approach just using k
, I’d get errors related to a mismatch in size. So I’ve hacked it so that value always starts at 1 (the first threshold vector).
Broadly, I have done this because I have high-dimensional data vector with different thresholds across this vector. This includes binary variables, 5-levels, 7-levels, 12-levels, etc. I include some code here of the full model (continuous and ordinal). Maybe it’s a totally inefficient way to do? Essentially I am just trying to capturing which tau
vector to use.
functions{
// Gaussian Copula Functions coded by Sean Pinkney
// https://spinkney.github.io/helpful_stan_functions/group__gaussian__copula.html
// Method derived from Smith and Khaled (2011) DOI: 10.2139/ssrn.1937983.
// U is a matrix of random variates for each marginal returned from the function
// centered_gaussian_copula_cholesky and L is the Cholesky factor of the corr mat
real multi_normal_cholesky_copula_lpdf(matrix U, matrix L) {
int N = rows(U); // total number of observations
int J = cols(U); // total number of responses (J+K)
matrix[J, N] G = mdivide_left_tri_low(L, U'); // equivalent to L*inverse(tri(U))
return -N * sum(log(diagonal(L))) - 0.5 * ( sum( G^2 ) - sum(U^2) ); // log probability
}
// marginals is a nested array of marginal calculations from each marginal type
// L is the cholesky factor of the correlation matrix and order is the variable order
// In combinatinon with above function, returns the log probability
real centered_gaussian_copula_cholesky_(matrix[,] marginals, matrix L, int[] order) {
// Extract dimensions of final outcome matrix
int N = rows(marginals[1][1]); // total number of observations
int J = rows(L); // number of responses (J+K)
matrix[N, J] U; // empty matrix for marginal calculations
// Iterate through marginal arrays, concatenating the outcome matrices by column
// and aggregating the log-likelihoods (from continuous marginals) and jacobian
// adjustments (from discrete marginals)
real adj = 0;
int pos = 1;
for (m in 1 : size(marginals)) {
int curr_cols = cols(marginals[m][1]);
U[ : , pos : (pos + curr_cols - 1)] = marginals[m][1];
adj += sum(marginals[m][2]); //jacobian adjustments
pos += curr_cols;
}
// Return the sum of the log-probability for copula outcomes and jacobian adjustments
return multi_normal_cholesky_copula_lpdf(U[ : , order] | L) + adj;
}
// Returns 2D array of matrices containing the centered random variables rtn[1] and
// jacobian adjustments rtn[2]. Assumes the marginal is distributed std_normal()
matrix[] continuous_marginal(matrix y, matrix mu_glm, matrix sigma) {
int N = rows(mu_glm);
int J = cols(mu_glm);
matrix[N, J] rtn[2];
// Initialise the jacobian adjustments to zero, as vectorised lpdf will be used
rtn[2] = rep_matrix(0, N, J);
for (j in 1 : J) {
rtn[1][ : , j] = (y[ : , j] - mu_glm[ : , j]) ./ sigma[ :,j]; // random variate
rtn[2][1, j] = normal_lpdf(y[ : , j] | mu_glm[ : , j], sigma[ :,j]); // jacobian adj
}
return rtn;
}
// Adapted from Ben Goodrich's Stan Implementation of the GHK Algorithm
// https://github.com/stan-dev/example-models/blob/master/misc/multivariate-probit/probit-multi-good.stan#L11
// We know the latent data model (Albert and Chib) can be rewritten using cholesky
// factor y = mu + L*z where L is the cholesky and z ~ N(0,1). One can simulate draws
// from a trun MVN using draws from a univ Normal. Given the constraints (U and L bounds)
// we introduce a utility u that is standard uniform (0,1). In instances where data are
// missing,there is no constraint on y and u is modelled as any other unknown parameter.
// The approach of treating missing data as additional parameters in the model can be
// thought of as predictions of the marginal model and is the recommendation put forth
// in BDA3 as well as several Stan dev's given the analytical issues to integrate
// out the missing data
// Returns 2D array of matrices of random latent variates (z~N(0,1)) and jacobian adjustments
// u is the utility parameter assumed standard uniform, n_thr is vector describing
// number of cutpoints for each response, tau is the cutpoint parameter, L is the cholesky factor
matrix[] probit_marginal(array [,] int y, matrix mu_glm, real[,] u, array [] int n_thr, array [] vector tau_teeth, array [] vector tau_seven_stage, array [] vector tau_pelvis, vector tau_carpals, vector tau_tarsals, matrix L){
int N = rows(mu_glm);
int K = cols(mu_glm);
matrix[N,K] rtn[2];
for(n in 1:N){
vector[K] z[N];
real prev = 0;
for(k in 1:K){
if(n_thr[k] == 1){ //binary
if(y[n,k] == 99){ // 99 is missing
z[n,k] = L[k,k]*inv_Phi(u[n,k]);
rtn[1][n,k] = z[n,k]; // here z is unconstrained
rtn[2][n,k] = 0; // no adjustment because no transformation
} else if(y[n,k] == 1){
real bound = Phi(-(mu_glm[n,k] + prev) / L[k,k]);
real t = bound + (1 - bound)*u[n,k];
z[n,k] = inv_Phi(t);
rtn[1][n,k] = z[n,k]; // latent variable
rtn[2][n,k] = log1m(bound); // adjustment
} else{
real bound = Phi(-(mu_glm[n,k] + prev) / L[k,k]);
real t = bound*u[n,k];
z[n,k] = inv_Phi(t);
rtn[1][n,k] = z[n,k]; // latent variable
rtn[2][n,k] = log(bound); // adjustment
}
} else if(n_thr[k] == 12){ // dentition
if(y[n,k] == 99){ // missing
z[n,k] = L[k,k]*inv_Phi(u[n,k]);
rtn[1][n,k] = z[n,k]; // here z is unconstrained
rtn[2][n,k] = 0;
} else if(y[n,k] == 1){ // bound
real ub = Phi((tau_teeth[1+k-8,1] - (mu_glm[n,k] + prev)) / L[k,k]);
real t = ub*u[n,k];
z[n,k] = inv_Phi(t);
rtn[1][n,k] = z[n,k]; // latent variable
rtn[2][n,k] = log(ub); // adjustment
} else if (y[n,k] == n_thr[k] + 1){ // bound
real lb = Phi((tau_teeth[1+k-8,n_thr[k]] - (mu_glm[n,k] + prev)) / L[k,k]);
real t = lb + (1 - lb)*u[n,k];
z[n,k] = inv_Phi(t);
rtn[1][n,k] = z[n,k]; // latent variable
rtn[2][n,k] = log1m(lb); // adjustment
} else{ // between bounds
real lb = Phi((tau_teeth[1+k-8,y[n,k] - 1] - (mu_glm[n,k] + prev)) / L[k,k]);
real ub = Phi((tau_teeth[1+k-8,y[n,k]] - (mu_glm[n,k] + prev)) / L[k,k]);
real t = lb + (ub-lb)*u[n,k];
z[n,k] = inv_Phi(t);
rtn[1][n,k] = z[n,k]; // latent variable
rtn[2][n,k] = log(ub-lb); // adjustment
}
} else if(n_thr[k] == 6){ // 7-stage variables
if(y[n,k] == 99){ // missing
z[n,k] = L[k,k]*inv_Phi(u[n,k]);
rtn[1][n,k] = z[n,k]; // here z is unconstrained
rtn[2][n,k] = 0;
} else if(y[n,k] == 1){ // bound
real ub = Phi((tau_seven_stage[1+k-24,1] - (mu_glm[n,k] + prev)) / L[k,k]);
real t = ub*u[n,k];
z[n,k] = inv_Phi(t);
rtn[1][n,k] = z[n,k]; // latent variable
rtn[2][n,k] = log(ub); // adjustment
} else if (y[n,k] == n_thr[k] + 1){ // bound
real lb = Phi((tau_seven_stage[1+k-24,n_thr[k]] - (mu_glm[n,k] + prev)) / L[k,k]);
real t = lb + (1 - lb)*u[n,k];
z[n,k] = inv_Phi(t);
rtn[1][n,k] = z[n,k]; // latent variable
rtn[2][n,k] = log1m(lb); // adjustment
} else{ // between bounds
real lb = Phi((tau_seven_stage[1+k-24,y[n,k] - 1] - (mu_glm[n,k] + prev)) / L[k,k]);
real ub = Phi((tau_seven_stage[1+k-24,y[n,k]] - (mu_glm[n,k] + prev)) / L[k,k]);
real t = lb + (ub-lb)*u[n,k];
z[n,k] = inv_Phi(t);
rtn[1][n,k] = z[n,k]; // latent variable
rtn[2][n,k] = log(ub-lb); // adjustment
}
} else if(n_thr[k] == 8){ // carpals
if(y[n,k] == 99){ // missing
z[n,k] = L[k,k]*inv_Phi(u[n,k]);
rtn[1][n,k] = z[n,k]; // here z is unconstrained
rtn[2][n,k] = 0;
} else if(y[n,k] == 1){ // bound
real ub = Phi((tau_carpals[1] - (mu_glm[n,k] + prev)) / L[k,k]);
real t = ub*u[n,k];
z[n,k] = inv_Phi(t);
rtn[1][n,k] = z[n,k]; // latent variable
rtn[2][n,k] = log(ub); // adjustment
} else if (y[n,k] == n_thr[k] + 1){ // bound
real lb = Phi((tau_carpals[n_thr[k]] - (mu_glm[n,k] + prev)) / L[k,k]);
real t = lb + (1 - lb)*u[n,k];
z[n,k] = inv_Phi(t);
rtn[1][n,k] = z[n,k]; // latent variable
rtn[2][n,k] = log1m(lb); // adjustment
} else{ // between bounds
real lb = Phi((tau_carpals[y[n,k] - 1] - (mu_glm[n,k] + prev)) / L[k,k]);
real ub = Phi((tau_carpals[y[n,k]] - (mu_glm[n,k] + prev)) / L[k,k]);
real t = lb + (ub-lb)*u[n,k];
z[n,k] = inv_Phi(t);
rtn[1][n,k] = z[n,k]; // latent variable
rtn[2][n,k] = log(ub-lb); // adjustment
}
} else if(n_thr[k] == 7){ // tarsals
if(y[n,k] == 99){ // missing
z[n,k] = L[k,k]*inv_Phi(u[n,k]);
rtn[1][n,k] = z[n,k]; // here z is unconstrained
rtn[2][n,k] = 0;
} else if(y[n,k] == 1){ // bound
real ub = Phi((tau_tarsals[1] - (mu_glm[n,k] + prev)) / L[k,k]);
real t = ub*u[n,k];
z[n,k] = inv_Phi(t);
rtn[1][n,k] = z[n,k]; // latent variable
rtn[2][n,k] = log(ub); // adjustment
} else if (y[n,k] == n_thr[k] + 1){ // bound
real lb = Phi((tau_tarsals[n_thr[k]] - (mu_glm[n,k] + prev)) / L[k,k]);
real t = lb + (1 - lb)*u[n,k];
z[n,k] = inv_Phi(t);
rtn[1][n,k] = z[n,k]; // latent variable
rtn[2][n,k] = log1m(lb); // adjustment
} else{ // between bounds
real lb = Phi((tau_tarsals[y[n,k] - 1] - (mu_glm[n,k] + prev)) / L[k,k]);
real ub = Phi((tau_tarsals[y[n,k]] - (mu_glm[n,k] + prev)) / L[k,k]);
real t = lb + (ub-lb)*u[n,k];
z[n,k] = inv_Phi(t);
rtn[1][n,k] = z[n,k]; // latent variable
rtn[2][n,k] = log(ub-lb); // adjustment
}
} else{ // pelvis
if(y[n,k] == 99){ // missing
z[n,k] = L[k,k]*inv_Phi(u[n,k]);
rtn[1][n,k] = z[n,k]; // here z is unconstrained
rtn[2][n,k] = 0;
} else if(y[n,k] == 1){ // bound
real ub = Phi((tau_pelvis[1+k-42,1] - (mu_glm[n,k] + prev)) / L[k,k]);
real t = ub*u[n,k];
z[n,k] = inv_Phi(t);
rtn[1][n,k] = z[n,k]; // latent variable
rtn[2][n,k] = log(ub); // adjustment
} else if (y[n,k] == n_thr[k] + 1){ // bound
real lb = Phi((tau_pelvis[1+k-42,n_thr[k]] - (mu_glm[n,k] + prev)) / L[k,k]);
real t = lb + (1 - lb)*u[n,k];
z[n,k] = inv_Phi(t);
rtn[1][n,k] = z[n,k]; // latent variable
rtn[2][n,k] = log1m(lb); // adjustment
} else{ // between bounds
real lb = Phi((tau_pelvis[1+k-42,y[n,k] - 1] - (mu_glm[n,k] + prev)) / L[k,k]);
real ub = Phi((tau_pelvis[1+k-42,y[n,k]] - (mu_glm[n,k] + prev)) / L[k,k]);
real t = lb + (ub-lb)*u[n,k];
z[n,k] = inv_Phi(t);
rtn[1][n,k] = z[n,k]; // latent variable
rtn[2][n,k] = log(ub-lb); // adjustment
}
}
if(k < K){
prev = L[k+1, 1:k]*head(z[n],k);
}
// Jacobian adjustments imply z is truncated standard normal
// thus utility --- mu + L_Omega * z --- is truncated multivariate normal
}
}
return rtn;
}
}
data{
// The ordinal and continuous data are included separately. This is to facilitate
// ease of each individual marginal function and to handle missing data in both
// cases
int<lower=1> N; // total sample size
int<lower=1> J; // total number of continuous responses
int<lower=1> K; // total number of ordinal responses
real<lower=0> X[N]; // chronological age (independent variable)
//ordinal
int y_ord[N,K]; // 2D array of integers n obs x k ordinal responses
int thresholds[K]; // vector of integer values signifying number of cutpoints per response
// continuous
int<lower=0> complete_J; // integer of non-missing values in continuous data
int<lower=0> miss_J; // integer of missing values in continuous data
real y_cont[complete_J]; // real vector of non-missing values
int ind_pres[complete_J, 2]; // array matrix (row, col) of non-missing value indices
int ind_miss[miss_J, 2];// array matrix (row, col) of missing value indices
}
transformed data{
// The purpose of this variable is simply to keep the responses organized for the
// model. It is not necessary for model construction, but keeps things in order
int order[K+J]; // order of responses with ordinal first (binary, poly, continuous)
for(i in 1:K+J){
order[i] = i;
}
}
parameters{
// Like the data block, I separate the parameters here by data type for ease of
// interpretation
cholesky_factor_corr[K+J] L; // cholesky factor of the correlation matrix
// ordinal
vector<lower=0>[K] o1; // parameter of ordinal mean function per response
ordered[12]tau_teeth[16]; //cutpoints for dentition
ordered[6] tau_seven_stage[16]; // cutpoints for 7-stage responses (includes tarsals)
ordered[8] tau_carpals; // cutpoints for carpals
ordered[7] tau_tarsals;
ordered[2] tau_pelvis[2]; // cutpoints for pelvic fusion (excluding IC_EF)
real<lower=0, upper=1> u[N,K]; // nuisance that absorbs inequality constraints in GHK and tMVN
// continuous
real ymiss_J[miss_J]; // array of missing data parameters in continuous data
vector<lower=0>[J] c1; // first parameter of continuous mean function
vector<lower=0>[J] c2; // second parameter of continuous mean function
vector[J] c3; // third parameter of continuous mean function
vector<lower=0>[J] k1; // first parameter of continuous noise function
vector<lower=0>[J] k2; // second parameter of continuous noise function
}
model{
// This is a rather involved model block where I construct the mean and SD of
// both data types and put together the matrix for continuous responses using the
// known data y_cont and marginally predicted parameter ymiss_J. This is what goes
// into the marginal function. Note, technically 'u' is the parameter also used
// for missing data in the ordinal model, but is left unconstrained.
// I make the calculations here instead of the transformed parameters block to save memory
matrix[N,J] mu_J; // continuous mean
matrix[N,J] sd_J; // continuous sd
matrix[N,J] y_cont_full; // The "data" with predicted missing values
matrix[N,K] mu_K; // ordinal mean
for(i in 1:N){
for(j in 1:J){
mu_J[i,j] = c2[j]*X[i]^c1[j] + c3[j];
sd_J[i,j] = k1[j]*(1+k2[j]*X[i]);
}
for(k in 1:K){
mu_K[i,k] = X[i]^o1[k];
}
}
for(n in 1:complete_J) {
y_cont_full[ind_pres[n,1]][ind_pres[n,2]] = y_cont[n];
}
// Fill the rest of y_cont_full with missing value "parameters"
for(i in 1:miss_J){
y_cont_full[ind_miss[i,1]][ind_miss[i,2]] = ymiss_J[i];
}
// PRIORS //
L ~ lkj_corr_cholesky(4); //
// Continuous parameters
c1 ~ normal(0,1);
c2 ~ normal(0,1);
c3 ~ normal(0,1);
k1 ~ cauchy(0,1);
k2 ~ normal(0,1);
// Ordinal parameters
o1 ~ normal(0,5);
for(i in 1:16){
for(t in 1:12){
tau_teeth[i,t] ~ normal(t+0.5,1); // dentition cutpoint prior
}
}
for(i in 1:16){
for(t in 1:6){
tau_seven_stage[i,t] ~ normal(t+0.5,1); // 7-stage cutpoint prior
}
}
for(i in 1:1){
for(t in 1:2){
tau_pelvis[i,t] ~ normal(t+0.5,1); // pelvis cutpoint prior
}
}
for(t in 1:8){
tau_carpals[t] ~ normal(t+0.5,1); // carpal cutpoint prior
}
for(t in 1:7){
tau_tarsals[t] ~ normal(t+0.5,1); // carpal cutpoint prior
}
// LIKELIHOOD //
target += centered_gaussian_copula_cholesky_({probit_marginal(y_ord, mu_K, u, thresholds, tau_teeth, tau_seven_stage, tau_pelvis, tau_carpals, tau_tarsals,L), continuous_marginal(y_cont_full, mu_J, sd_J)}, L, order);
}
generated quantities{
// Generally the parameter of interest from a Gaussian copula in the correlation structure Rho
// I reconstruct here to be used downstream
corr_matrix[K+J] cor_mat = multiply_lower_tri_self_transpose(L);
}