# Missing Data: Mixed Discrete-Continuous Gaussian Copula

Hi All!

Basic Question
How can I model missing data associated with a multivariate vector of outcomes in a mixed discrete-continuous Gaussian copula?

Specifics
First, I must thank both @spinkney and @andrjohns who helped here and @martinmodrak who helped here.

I have a multi-dimensional response vector of mixed discrete (binary and polychotomous) and continuous outcomes. I wish to learn the joint distribution of this series of traits and their dependence structure, particularly as it relates to a single predictor variable (x is a single real value per observation representing age in years). Based on reading and previous posts here I turned towards a copula, which provides a way to decouple the joint into marginals and examine the dependency structure of the large (~60 dimensions) multivariate space. Based on @spinkney 's helpful work here, @andrjohns brms implementation here, and @martinmodrak 's advice here (that draws from @bgoodri 's multi-probit implementation here), I have fit a mixed ordinal and continuous gaussian copula. All diagnostics look good and the only adjustment made in sampling is max_treedepth to 13 and running 4 chains, 6000 iterations (1000 warmup). I share some of the pairs here (C1 and C2 may cause troubles later?, but they are highly correlated given their relationship in the mean functionâ€¦)

My main question is related to missing data. Right now Iâ€™ve fit the model to 5 traits (2 continuous, 1 binary, 2 polychotomous) with no missing cases - this is only n=195. I know to learn correlations we need LOTS of data and the total N is 1305, but the matrix is sparse in places with missing data across each data type. I have read through Stanâ€™s chapters on missing data, but I am a little lost how to apply in the case of a copula. Essentially, I want to account for missing data across each trait in the model. For now Iâ€™ve got 5 traits, eventually I will have ~60 traits.

Let me know if there are any questions about the model! Thank you for any advice :)

functions{
matrix chol2inv2(matrix L) {
int K = rows(L);
matrix[K, K] K_identity = diag_matrix(rep_vector(1.0, K));
matrix[K, K] L_inv = mdivide_left_tri_low(L, K_identity);
matrix[K, K] rtn;
if (K == 0) {
return L;
}
if (K == 1) {
rtn[1, 1] = inv_square(L[1, 1]);
} else {
for (k in 1:K) {
rtn[k, k] = dot_self(tail(L_inv[ : , k], K + 1 - k));
for (j in (k + 1):K) {
int Kmj = K - j;
rtn[k, j] = dot_product(tail(L_inv[ : , k], Kmj + 1),
tail(L_inv[ : , j], Kmj + 1));
rtn[j, k] = rtn[k, j];
}
}
}
return rtn;
}
real multi_normal_cholesky_copula_lpdf(matrix U, matrix L) {
int N = rows(U);
int J = cols(U);
matrix[J, J] Gammainv = chol2inv2(L);
return -N * sum(log(diagonal(L))) - 0.5 * sum(add_diag(Gammainv, -1.0) .* crossprod(U));
}
real centered_gaussian_copula_cholesky_(matrix[,] marginals, matrix L, int[] order) {
// Extract dimensions of final outcome matrix
int N = rows(marginals[1][1]);
int J = rows(L);
matrix[N, J] U;
// Iterate through marginal arrays, concatenating the outcome matrices by column
// and aggregating the log-likelihoods (from continuous marginals) and jacobian
int pos = 1;
for (m in 1 : size(marginals)) {
int curr_cols = cols(marginals[m][1]);
U[ : , pos : (pos + curr_cols - 1)] = marginals[m][1];
pos += curr_cols;
}
// Return the sum of the log-probability for copula outcomes and jacobian adjustments
return multi_normal_cholesky_copula_lpdf(U[ : , order] | L) + adj;
}
matrix[] normal_marginal(matrix y, matrix mu_glm, matrix sigma) {
int N = rows(mu_glm);
int J = cols(mu_glm);
matrix[N, J] rtn[2];
// Initialise the jacobian adjustments to zero, as vectorised lpdf will be used
rtn[2] = rep_matrix(0, N, J);
for (j in 1 : J) {
rtn[1][ : , j] = (y[ : , j] - mu_glm[ : , j]) ./ sigma[ :,j];
rtn[2][1, j] = normal_lpdf(y[ : , j] | mu_glm[ : , j], sigma[ :,j]);
}
return rtn;
}
matrix[] ordinal_marginal(array [,] int y, matrix mu_glm, real[,] u, array [] int n_thr, array [] vector tau, matrix L){
int N = rows(mu_glm);
int K = cols(mu_glm);
matrix[N,K] rtn[2];
for(n in 1:N){
vector[K] z[N];
real prev = 0;
for(k in 1:K){
if(n_thr[k] == 1){ //binary
real bound;
bound = Phi(-(mu_glm[n,k] + prev) / L[k,k]);
if(y[n,k] == 1){
real t;
t = bound + (1 - bound)*u[n,k];
z[n,k] = inv_Phi(t);
rtn[1][n,k] = z[n,k];
rtn[2][n,k] = log1m(bound);
} else{
real t;
t = bound*u[n,k];
z[n,k] = inv_Phi(t);
rtn[1][n,k] = z[n,k];
rtn[2][n,k] = log(bound);
}
} else{ // polychotomous
real t;
if(y[n,k] == 1){
real ub = Phi((tau[1+k-2,1] - (mu_glm[n,k] + prev)) / L[k,k]);
t = ub*u[n,k];
z[n,k] = inv_Phi(t);
rtn[1][n,k] = z[n,k];
rtn[2][n,k] = log(ub);
} else if (y[n,k] == n_thr[k] + 1){
real lb = Phi((tau[1+k-2,n_thr[k]] - (mu_glm[n,k] + prev)) / L[k,k]);
t = lb + (1 - lb)*u[n,k];
z[n,k] = inv_Phi(t);
rtn[1][n,k] = z[n,k];
rtn[2][n,k] = log1m(lb);
} else{
real lb = Phi((tau[1+k-2,y[n,k] - 1] - (mu_glm[n,k] + prev)) / L[k,k]);
real ub = Phi((tau[1+k-2,y[n,k]] - (mu_glm[n,k] + prev)) / L[k,k]);
t = lb + (ub-lb)*u[n,k];
z[n,k] = inv_Phi(t);
rtn[1][n,k] = z[n,k];
rtn[2][n,k] = log(ub-lb);
}
}
if(k < K){
}
}
}
return rtn;
}
}
data{
int<lower=1> N; // total sample size
int<lower=1> J; // # of continuous variables (normal marginal)
int<lower=1> K; // # of ordinal variables (ordinal marginal)
int<lower=1> Thr[K]; # of thresholds for ordinal variables
matrix[N,J] Y_C; // continuous responses
int Y_O[N,K]; // ordinal responses
real X[N]; // predictor (age)
}
transformed data{
int order[K+J]; // order of responses for copula -> ordinal (binary) first
for(i in 1:K+J){
order[i] = i;
}
}
parameters{
vector<lower=0>[J] c1; // first parameter of continuous mean function
vector<lower=0>[J] c2; // second parameter of continuous mean function
vector[J] c3; // third parameter of continuous mean function
vector<lower=0>[J] k1; // first parameter of continuous noise function
vector<lower=0>[J] k2; // second parameter of continuous noise function
vector<lower=0>[K] o1; // first parameter of ordinal mean function
ordered[Thr[3]]tau[2]; //cutpoints for ordinal model (polychtomous only)
real<lower=0, upper=1> U[N,K]; //nuisance that absorbs inequality constraints
cholesky_factor_corr[K+J] L; // cholesky factor of correlation matrix
}
model{
// Priors
c1 ~ normal(0,1);
c2 ~ normal(0,1);
c3 ~ normal(0,1);
k1 ~ cauchy(0,1);
k2 ~ normal(0,1);
o1 ~ normal(0,5);
L ~ lkj_corr_cholesky(4); //
for(i in 1:2){
for(t in 1:Thr[3]){
tau[i,t] ~ normal(t+0.5,1); // cutpoint prior
}
}
// Mean and SD Functions for Likelihood
matrix[N,J] mu_J; // continuous
matrix[N,J] sd_J; // continuous
matrix[N,K] mu_K; // ordinal
for(i in 1:N){
for(j in 1:J){
mu_J[i,j] = c2[j]*X[i]^c1[j] + c3[j];
sd_J[i,j] = k1[j]*(1+k2[j]*X[i]);
}
for(k in 1:K){
mu_K[i,k] = X[i]^o1[k];
}
}
// Likelihood
target += centered_gaussian_copula_cholesky_({ordinal_marginal(Y_O,mu_K,U,Thr,tau,L), normal_marginal(Y_C,mu_J,sd_J)},L,order);
}
generated quantities{
corr_matrix[K+J] Omega = multiply_lower_tri_self_transpose(L);
}

2 Likes

Itâ€™s actually â€śeasyâ€ť - depending on your level of comfort with Stan.

Put all your observed Y outcomes in a matrix, called Y_obs, and supply the indexes of the values for the full Y matrix for observed and missing. Create a matrix or vector of the size of your missing values in the parameters block, say Y_miss. In transformed parameters construct the full Y matrix using Y_obs and Y_miss. Use Y as you typically would.

1 Like

Thank you!

I presume I can do this for each outcome type and then just use the Y as normal within each marginal for the copula?

Do you have any examples (or links to such outside of the manual)? I think I just want to see the basics of the process you describe above. My only concern is the missing is not uniform across outcomes, so some outcomes will have different #'s of observed vs missing.

The varying number of missing is not a problem because you supply the index of missing for each marginal.

The complication is actually in the discrete outcomes. The easiest thing to do is to get out all missing discrete Y on the latent scale and then transform in generated quantities using the same logic in the lpdf but in the inverse.

Iâ€™m not at a computer for another day or so so donâ€™t have any links to give atm.

1 Like

Thank you again!

I will give it a shot over the next day or so and may shoot another reply your wayâ€¦

Still trying to wrap my head around a few things. The model currently supplies what your initial post describes as y_obs. Trying to figure out what the object you describe as â€śindex values for the full matrixâ€ť looks like and the matrix/vector of y_miss looks like in turn, and then how to construct the full matrix (I should go back into the manualâ€¦). THEN I need to also figure out the discrete parametersâ€¦

1 Like

I think you may get rid of that chol2inv. I havenâ€™t tested but see if this works.

It may be slightly slower since it constructs a J x N matrix instead of J x J but you may want to test it out.

   real multi_normal_cholesky_copula_lpdf(matrix U, matrix L) {
int N = rows(U);
int J = cols(U);
matrix[J, N] G = mdivide_left_tri_low(L, U') ;
return -N * sum(log(diagonal(L))) -  0.5 * ( sum( G^2 ) - sum(U^2) );
}


@spinkney I can confirm that removal of chol2inv works and achieves identical results to the previous - takes about 4.5 hours to fit 5 variables.

In relation to the missing data, I am trying to fit the continuous missing first (in a bivariate case for now). Iâ€™m having issues most likely stemming from slicing and indexing ragged arrays. I include the code here for now. I have 2 response variables with different numbers of observed and missing. I can read each variable in separately and then put together at the end into the y matrix. Is there an efficient way to do this in one go - Iâ€™ll eventually have 60 variables (continuous and ordinal) and was trying to avoid including 60 iterations of y_obs and y_mis.

functions{
real multi_normal_cholesky_copula_lpdf(matrix U, matrix L) {
int N = rows(U);
int J = cols(U);
matrix[J, N] G = mdivide_left_tri_low(L, U') ;
return -N * sum(log(diagonal(L))) -  0.5 * ( sum( G^2 ) - sum(U^2) );
}

real centered_gaussian_copula_cholesky_(matrix[,] marginals, matrix L, int[] order) {
// Extract dimensions of final outcome matrix
int N = rows(marginals[1][1]);
int J = rows(L);
matrix[N, J] U;

// Iterate through marginal arrays, concatenating the outcome matrices by column
// and aggregating the log-likelihoods (from continuous marginals) and jacobian
int pos = 1;
for (m in 1 : size(marginals)) {
int curr_cols = cols(marginals[m][1]);

U[ : , pos : (pos + curr_cols - 1)] = marginals[m][1];

pos += curr_cols;
}

// Return the sum of the log-probability for copula outcomes and jacobian adjustments
return multi_normal_cholesky_copula_lpdf(U[ : , order] | L) + adj;
}

matrix[] normal_marginal(matrix y, matrix mu_glm, matrix sigma) {
int N = rows(mu_glm);
int J = cols(mu_glm);
matrix[N, J] rtn[2];
// Initialise the jacobian adjustments to zero, as vectorised lpdf will be used
rtn[2] = rep_matrix(0, N, J);

for (j in 1 : J) {
rtn[1][ : , j] = (y[ : , j] - mu_glm[ : , j]) ./ sigma[ :,j];
rtn[2][1, j] = normal_lpdf(y[ : , j] | mu_glm[ : , j], sigma[ :,j]);
}

return rtn;
}
}
data{
int<lower=1> J; // # of continuous variables (normal marginal)
int<lower=0> N_obs[J]; // total number of observed
int<lower=0> N_mis[J]; // number missing
int<lower=1, upper=N_obs+N_mis> ii_obs[N_obs,J]; // indices of observed
int<lower=1, upper=N_obs+N_mis> ii_mis[N_mis,J]; // indices of missing
matrix[N_obs,J] Y_C; // non-missing continuous responses
real X[N_obs+N_mis]; // predictor (age)
}
transformed data{
int<lower=0> N = N_obs + N_mis;//
int order[J]; // order of responses for copula -> ordinal (binary) first
for(i in 1:J){
order[i] = i;
}
}
parameters{
vector[J] y_mis[N_mis]; // missing data parameter - UNSURE

vector<lower=0>[J] c1; // first parameter of continuous mean function
vector<lower=0>[J] c2; // second parameter of continuous mean function
vector[J] c3; // third parameter of continuous mean function
vector<lower=0>[J] k1; // first parameter of continuous noise function
vector<lower=0>[J] k2; // second parameter of continuous noise function
cholesky_factor_corr[J] L; // cholesky factor of correlation matrix
}
transformed parameters{
matrix[N,J] y; // the data, observed + missing
y[ii_obs,J] = Y_C;
y[ii_mis,J] = y_mis;
}
model{
// Priors
c1 ~ normal(0,1);
c2 ~ normal(0,1);
c3 ~ normal(0,1);
k1 ~ cauchy(0,1);
k2 ~ normal(0,1);
L ~ lkj_corr_cholesky(4); //

// Mean and SD Functions for Likelihood
matrix[N,J] mu_J; // continuous
matrix[N,J] sd_J; // continuous
for(i in 1:N){
for(j in 1:J){
mu_J[i,j] = c2[j]*X[i]^c1[j] + c3[j];
sd_J[i,j] = k1[j]*(1+k2[j]*X[i]);
}
}

// Likelihood
target += centered_gaussian_copula_cholesky_({normal_marginal(y,mu_J,sd_J)},L,order);

}
generated quantities{
corr_matrix[J] Omega = multiply_lower_tri_self_transpose(L);
}



Does that code work above or are you having issues?

Also how long did the model with chol2inv take?

1. The model with chol2inv was ~ 30 minutes faster.
2. The code above would not compile - I would receive errors about array construction. I tried a slightly different route. I post the stan code and r code associated below. There are runtime errors here associated with treedepth, bulk ess, and bfmi. I think these â€śmayâ€ť be solved if I adjust the priors and sampling/warm-up time based on the fact this log probability is based on 2 continuous variable and not the 5 I fit previously. I had to do a lot of adjusting previously to fit the initial model I described with continuous and ordinal combined.

However, maybe there is a more efficient way to write this model? I now need to also include the ordinal data.

dat_new <- dat %>% filter(location == "US") %>% select(FDL_L, HDL_L) %>% na_if(-1) %>% droplevels() # 2 continuous response vars
age <- dat_new\$agey # X var

# Extract the missing values into a VECTOR
dat_complete <- dat_new[!is.na(dat_new)]

# Extract the missing and present values as MATRICES
ind_pres <- which(!is.na(dat_new), arr.ind = TRUE)
ind_miss <- which(is.na(dat_new), arr.ind = TRUE)
mod.data <- list(J = 2,
Nrow = nrow(dat_new),
Ncol = ncol(dat_new),
Ncomp = length(dat_complete),
Nmiss = sum(is.na(dat_new)),
dat_complete = dat_complete,
ind_pres = ind_pres,
ind_miss = ind_miss,
X = age)

test_miss <- stan("cont_missing.stan", data = mod.data,control = list(max_treedepth = 12),init = "0")

functions{
real multi_normal_cholesky_copula_lpdf(matrix U, matrix L) {
int N = rows(U);
int J = cols(U);
matrix[J, N] G = mdivide_left_tri_low(L, U') ;
return -N * sum(log(diagonal(L))) -  0.5 * ( sum( G^2 ) - sum(U^2) );
}

real centered_gaussian_copula_cholesky_(matrix[,] marginals, matrix L, int[] order) {
// Extract dimensions of final outcome matrix
int N = rows(marginals[1][1]);
int J = rows(L);
matrix[N, J] U;

// Iterate through marginal arrays, concatenating the outcome matrices by column
// and aggregating the log-likelihoods (from continuous marginals) and jacobian
int pos = 1;
for (m in 1 : size(marginals)) {
int curr_cols = cols(marginals[m][1]);

U[ : , pos : (pos + curr_cols - 1)] = marginals[m][1];

pos += curr_cols;
}

// Return the sum of the log-probability for copula outcomes and jacobian adjustments
return multi_normal_cholesky_copula_lpdf(U[ : , order] | L) + adj;
}

matrix[] normal_marginal(matrix y, matrix mu_glm, matrix sigma) {
int N = rows(mu_glm);
int J = cols(mu_glm);
matrix[N, J] rtn[2];
// Initialise the jacobian adjustments to zero, as vectorised lpdf will be used
rtn[2] = rep_matrix(0, N, J);

for (j in 1 : J) {
rtn[1][ : , j] = (y[ : , j] - mu_glm[ : , j]) ./ sigma[ :,j];
rtn[2][1, j] = normal_lpdf(y[ : , j] | mu_glm[ : , j], sigma[ :,j]);
}

return rtn;
}
}
data{
int J; // # of outcomes
int<lower=0> Nrow;
int<lower=0> Ncol;
int<lower=0> Ncomp; // Number of non-missing values
int<lower=0> Nmiss; // Number of missing values
real dat_complete[Ncomp];   // Vector of non-missing values
int ind_pres[Ncomp, 2];     // Matrix (row, col) of non-missing value indices
int ind_miss[Nmiss, 2];// Matrix (row, col) of missing value indices
real X[Nrow]; // predictor (age)
}
transformed data{
int order[J]; // order of responses for copula -> ordinal (binary) first
for(i in 1:J){
order[i] = i;
}
}
parameters{
real ymiss[Nmiss]; // missing data parameter - UNSURE

vector<lower=0>[J] c1; // first parameter of continuous mean function
vector<lower=0>[J] c2; // second parameter of continuous mean function
vector[J] c3; // third parameter of continuous mean function
vector<lower=0>[J] k1; // first parameter of continuous noise function
vector<lower=0>[J] k2; // second parameter of continuous noise function
cholesky_factor_corr[J] L; // cholesky factor of correlation matrix
}
transformed parameters{
matrix[Nrow,Ncol] y;   // The "data" with interpolated missing values  y[ii_obs,J] = Y_C;
// Fill y with non-missing values
for(n in 1:Ncomp) {
y[ind_pres[n,1]][ind_pres[n,2]] = dat_complete[n];
}
// Fill the rest of y with missing value "parameters"
for(n in 1:Nmiss){
y[ind_miss[n,1]][ind_miss[n,2]] = ymiss[n];
}
}
model{
// Priors
c1 ~ normal(0,1);
c2 ~ normal(0,1);
c3 ~ normal(0,1);
k1 ~ cauchy(0,1);
k2 ~ normal(0,1);
L ~ lkj_corr_cholesky(4); //

// Mean and SD Functions for Likelihood
matrix[Nrow,J] mu_J; // continuous
matrix[Nrow,J] sd_J; // continuous
for(i in 1:Nrow){
for(j in 1:J){
mu_J[i,j] = c2[j]*X[i]^c1[j] + c3[j];
sd_J[i,j] = k1[j]*(1+k2[j]*X[i]);
}
}

// Likelihood
target += centered_gaussian_copula_cholesky_({normal_marginal(y,mu_J,sd_J)},L,order);

}
generated quantities{
corr_matrix[J] Omega = multiply_lower_tri_self_transpose(L);
}



@spinkney After adjusting the prior on the cholesky factor, putting a vague prior on the missing values, and running 1000 warmup and 6000 iterations I have fit a bivariate copula (continuous margins only) with missing and observed data.

One last step/complication - incorporating the missing ordinal dataâ€¦

functions{
real multi_normal_cholesky_copula_lpdf(matrix U, matrix L) {
int N = rows(U);
int J = cols(U);
matrix[J, N] G = mdivide_left_tri_low(L, U') ;
return -N * sum(log(diagonal(L))) -  0.5 * ( sum( G^2 ) - sum(U^2) );
}

real centered_gaussian_copula_cholesky_(matrix[,] marginals, matrix L, int[] order) {
// Extract dimensions of final outcome matrix
int N = rows(marginals[1][1]);
int J = rows(L);
matrix[N, J] U;

// Iterate through marginal arrays, concatenating the outcome matrices by column
// and aggregating the log-likelihoods (from continuous marginals) and jacobian
int pos = 1;
for (m in 1 : size(marginals)) {
int curr_cols = cols(marginals[m][1]);

U[ : , pos : (pos + curr_cols - 1)] = marginals[m][1];

pos += curr_cols;
}

// Return the sum of the log-probability for copula outcomes and jacobian adjustments
return multi_normal_cholesky_copula_lpdf(U[ : , order] | L) + adj;
}

matrix[] normal_marginal(matrix y, matrix mu_glm, matrix sigma) {
int N = rows(mu_glm);
int J = cols(mu_glm);
matrix[N, J] rtn[2];
// Initialise the jacobian adjustments to zero, as vectorised lpdf will be used
rtn[2] = rep_matrix(0, N, J);

for (j in 1 : J) {
rtn[1][ : , j] = (y[ : , j] - mu_glm[ : , j]) ./ sigma[ :,j];
rtn[2][1, j] = normal_lpdf(y[ : , j] | mu_glm[ : , j], sigma[ :,j]);
}

return rtn;
}
}
data{
int J; // # of outcomes
int<lower=0> Nrow;
int<lower=0> Ncol;
int<lower=0> Ncomp; // Number of non-missing values
int<lower=0> Nmiss; // Number of missing values
real dat_complete[Ncomp];   // Vector of non-missing values
int ind_pres[Ncomp, 2];     // Matrix (row, col) of non-missing value indices
int ind_miss[Nmiss, 2];// Matrix (row, col) of missing value indices
real X[Nrow]; // predictor (age)
}
transformed data{
int order[J]; // order of responses for copula -> ordinal (binary) first
for(i in 1:J){
order[i] = i;
}
}
parameters{
real ymiss[Nmiss]; // missing data parameter - UNSURE

vector<lower=0>[J] c1; // first parameter of continuous mean function
vector<lower=0>[J] c2; // second parameter of continuous mean function
vector[J] c3; // third parameter of continuous mean function
vector<lower=0>[J] k1; // first parameter of continuous noise function
vector<lower=0>[J] k2; // second parameter of continuous noise function
cholesky_factor_corr[J] L; // cholesky factor of correlation matrix
}
transformed parameters{
matrix[Nrow,Ncol] y;   // The "data" with interpolated missing values
// Fill y with non-missing values
for(n in 1:Ncomp) {
y[ind_pres[n,1]][ind_pres[n,2]] = dat_complete[n];
}
// Fill the rest of y with missing value "parameters"
for(n in 1:Nmiss){
y[ind_miss[n,1]][ind_miss[n,2]] = ymiss[n];
}
}
model{
// Priors
c1 ~ normal(0,1);
c2 ~ normal(0,1);
c3 ~ normal(0,1);
k1 ~ cauchy(0,1);
k2 ~ normal(0,1);
L ~ lkj_corr_cholesky(1); //
ymiss ~ normal(0,100);

// Mean and SD Functions for Likelihood
matrix[Nrow,J] mu_J; // continuous
matrix[Nrow,J] sd_J; // continuous
for(i in 1:Nrow){
for(j in 1:J){
mu_J[i,j] = c2[j]*X[i]^c1[j] + c3[j];
sd_J[i,j] = k1[j]*(1+k2[j]*X[i]);
}
}

// Likelihood
target += centered_gaussian_copula_cholesky_({normal_marginal(y,mu_J,sd_J)},L,order);

}
generated quantities{
corr_matrix[J] Omega = multiply_lower_tri_self_transpose(L);
}


1 Like

Why not put the prior on ymiss of normal(mu_j, sd_j)?

You know - I do not have an answer for why I forgot about that - I will adjust.

Do you have any suggestions on the discrete outcomes (either binary or polychotomous)?

Hello all!

Just want to bump this for some additional advice on the missing discrete component of this model.

Iâ€™d like to marginalize over all missing values so I can use all available data as opposed to complete cases only. Iâ€™ve gone through the manualâ€™s chapters on both missing data and latent discrete parameters (i.e., change-point models), but am struggling how to translate to my use case.

My first stab was to use the knowledge from @bgoodri 's long ago musings on the tMVN (
tMVN.pdf (2.4 MB) that eventually led to example code for the multivariate probit. We know z = \phi^{-1}(u) where u is a standard uniform variate. Because the missing data is unbounded, I use the code from the pdf and set z[n,k] = inv_Phi(u[n,k]). Further, I set the jacobian transformation to 0 (ln(1)). Note, Iâ€™ve coded all missing data as 99. Iâ€™ve run this model and it fits, but I am very positive this is quite wrong and not what I want to do. I have found an example of missing data in a multivariate probit model at here (from @NikVetr github), but not sure how this may or may not relate. I know I need to sum over the possibilities of the missing data, just trying to figure out the most efficient means possible.

functions{
real multi_normal_cholesky_copula_lpdf(matrix U, matrix L) {
int N = rows(U);
int J = cols(U);
matrix[J, N] G = mdivide_left_tri_low(L, U') ;
return -N * sum(log(diagonal(L))) -  0.5 * ( sum( G^2 ) - sum(U^2) );
}

real centered_gaussian_copula_cholesky_(matrix[,] marginals, matrix L, int[] order) {
// Extract dimensions of final outcome matrix
int N = rows(marginals[1][1]);
int J = rows(L);
matrix[N, J] U;

// Iterate through marginal arrays, concatenating the outcome matrices by column
// and aggregating the log-likelihoods (from continuous marginals) and jacobian
int pos = 1;
for (m in 1 : size(marginals)) {
int curr_cols = cols(marginals[m][1]);

U[ : , pos : (pos + curr_cols - 1)] = marginals[m][1];

pos += curr_cols;
}

// Return the sum of the log-probability for copula outcomes and jacobian adjustments
return multi_normal_cholesky_copula_lpdf(U[ : , order] | L) + adj;
}

matrix[] ordinal_marginal(array [,] int y, matrix mu_glm, real[,] u, array [] int n_thr, array [] vector tau, matrix L){
int N = rows(mu_glm);
int K = cols(mu_glm);
matrix[N,K] rtn[2];

for(n in 1:N){
vector[K] z[N];
real prev = 0;
for(k in 1:K){
if(n_thr[k] == 1){ //binary
if(y[n,k] == 99){
z[n,k] = inv_Phi(u[n,k]);
rtn[1][n,k] = z[n,k];
rtn[2][n,k] = 0;
} else if(y[n,k] == 1){
real bound;
bound = Phi(-(mu_glm[n,k] + prev) / L[k,k]);
real t;
t = bound + (1 - bound)*u[n,k];
z[n,k] = inv_Phi(t);
rtn[1][n,k] = z[n,k];
rtn[2][n,k] = log1m(bound);
} else{
real bound;
bound = Phi(-(mu_glm[n,k] + prev) / L[k,k]);
real t;
t = bound*u[n,k];
z[n,k] = inv_Phi(t);
rtn[1][n,k] = z[n,k];
rtn[2][n,k] = log(bound);
}
} else{ // polychotomous
if(y[n,k] == 99){
z[n,k] = inv_Phi(u[n,k]);
rtn[1][n,k] = z[n,k];
rtn[2][n,k] = 0;
} else if(y[n,k] == 1){
real t;
real ub = Phi((tau[1+k-2,1] - (mu_glm[n,k] + prev)) / L[k,k]);
t = ub*u[n,k];
z[n,k] = inv_Phi(t);
rtn[1][n,k] = z[n,k];
rtn[2][n,k] = log(ub);
} else if (y[n,k] == n_thr[k] + 1){
real t;
real lb = Phi((tau[1+k-2,n_thr[k]] - (mu_glm[n,k] + prev)) / L[k,k]);
t = lb + (1 - lb)*u[n,k];
z[n,k] = inv_Phi(t);
rtn[1][n,k] = z[n,k];
rtn[2][n,k] = log1m(lb);
} else{
real t;
real lb = Phi((tau[1+k-2,y[n,k] - 1] - (mu_glm[n,k] + prev)) / L[k,k]);
real ub = Phi((tau[1+k-2,y[n,k]] - (mu_glm[n,k] + prev)) / L[k,k]);
t = lb + (ub-lb)*u[n,k];
z[n,k] = inv_Phi(t);
rtn[1][n,k] = z[n,k];
rtn[2][n,k] = log(ub-lb);
}
}
if(k < K){
}
}
}

return rtn;
}

}
data{
int<lower=1> N; // total sample size
int<lower=1> K; // # of ordinal variables (ordinal marginal)
int<lower=1> Thr[K]; # of thresholds for ordinal variables
int Y[N,K]; // ordinal responses
real X[N]; // predictor (age)
}
transformed data{
int order[K]; // order of responses for copula -> ordinal (binary) first
for(i in 1:K){
order[i] = i;
}
}
parameters{
vector<lower=0>[K] o1; // first parameter of ordinal mean function
ordered[Thr[3]]tau[2]; //cutpoints for ordinal model (polychtomous only)
real<lower=0, upper=1> U[N,K]; //nuisance that absorbs inequality constraints
cholesky_factor_corr[K] L; // cholesky factor of correlation matrix
}
model{
matrix[N,K] mu;
for(n in 1:N){
for(k in 1:K){
mu[n,k] = X[n]^o1[k];
}
}

// Priors
o1 ~ normal(0,5);
L ~ lkj_corr_cholesky(4); //
for(i in 1:2){
for(t in 1:Thr[3]){
tau[i,t] ~ normal(t+0.5,1); // cutpoint prior
}
}

// Likelihood
target += centered_gaussian_copula_cholesky_({ordinal_marginal(Y,mu,U,Thr,tau,L)},L,order);

}
generated quantities{
corr_matrix[K] Omega = multiply_lower_tri_self_transpose(L);
}