Missing Data: Mixed Discrete-Continuous Gaussian Copula

Hi All!

Basic Question
How can I model missing data associated with a multivariate vector of outcomes in a mixed discrete-continuous Gaussian copula?

Specifics
First, I must thank both @spinkney and @andrjohns who helped here and @martinmodrak who helped here.

I have a multi-dimensional response vector of mixed discrete (binary and polychotomous) and continuous outcomes. I wish to learn the joint distribution of this series of traits and their dependence structure, particularly as it relates to a single predictor variable (x is a single real value per observation representing age in years). Based on reading and previous posts here I turned towards a copula, which provides a way to decouple the joint into marginals and examine the dependency structure of the large (~60 dimensions) multivariate space. Based on @spinkney 's helpful work here, @andrjohns brms implementation here, and @martinmodrak 's advice here (that draws from @bgoodri 's multi-probit implementation here), I have fit a mixed ordinal and continuous gaussian copula. All diagnostics look good and the only adjustment made in sampling is max_treedepth to 13 and running 4 chains, 6000 iterations (1000 warmup). I share some of the pairs here (C1 and C2 may cause troubles later?, but they are highly correlated given their relationship in the mean function…)

My main question is related to missing data. Right now I’ve fit the model to 5 traits (2 continuous, 1 binary, 2 polychotomous) with no missing cases - this is only n=195. I know to learn correlations we need LOTS of data and the total N is 1305, but the matrix is sparse in places with missing data across each data type. I have read through Stan’s chapters on missing data, but I am a little lost how to apply in the case of a copula. Essentially, I want to account for missing data across each trait in the model. For now I’ve got 5 traits, eventually I will have ~60 traits.

Let me know if there are any questions about the model! Thank you for any advice :)

functions{
  matrix chol2inv2(matrix L) {
    int K = rows(L);
    matrix[K, K] K_identity = diag_matrix(rep_vector(1.0, K));
    matrix[K, K] L_inv = mdivide_left_tri_low(L, K_identity);
    matrix[K, K] rtn;
    if (K == 0) {
      return L;
    }
    if (K == 1) {
      rtn[1, 1] = inv_square(L[1, 1]);
    } else {
      for (k in 1:K) {
        rtn[k, k] = dot_self(tail(L_inv[ : , k], K + 1 - k));
        for (j in (k + 1):K) {
          int Kmj = K - j;
          rtn[k, j] = dot_product(tail(L_inv[ : , k], Kmj + 1),
                                  tail(L_inv[ : , j], Kmj + 1));
          rtn[j, k] = rtn[k, j];
        }
      }
    }
    return rtn;
  }
  real multi_normal_cholesky_copula_lpdf(matrix U, matrix L) {
    int N = rows(U);
    int J = cols(U);
    matrix[J, J] Gammainv = chol2inv2(L);
    return -N * sum(log(diagonal(L))) - 0.5 * sum(add_diag(Gammainv, -1.0) .* crossprod(U));
  }
  real centered_gaussian_copula_cholesky_(matrix[,] marginals, matrix L, int[] order) {
    // Extract dimensions of final outcome matrix
    int N = rows(marginals[1][1]);
    int J = rows(L);
    matrix[N, J] U;
    // Iterate through marginal arrays, concatenating the outcome matrices by column
    // and aggregating the log-likelihoods (from continuous marginals) and jacobian
    // adjustments (from discrete marginals)
    real adj = 0;
    int pos = 1;
    for (m in 1 : size(marginals)) {
      int curr_cols = cols(marginals[m][1]);
      U[ : , pos : (pos + curr_cols - 1)] = marginals[m][1];
      adj += sum(marginals[m][2]);
      pos += curr_cols;
    }
    // Return the sum of the log-probability for copula outcomes and jacobian adjustments
    return multi_normal_cholesky_copula_lpdf(U[ : , order] | L) + adj;
  }
  matrix[] normal_marginal(matrix y, matrix mu_glm, matrix sigma) {
    int N = rows(mu_glm);
    int J = cols(mu_glm);
    matrix[N, J] rtn[2];
    // Initialise the jacobian adjustments to zero, as vectorised lpdf will be used
    rtn[2] = rep_matrix(0, N, J);
    for (j in 1 : J) {
      rtn[1][ : , j] = (y[ : , j] - mu_glm[ : , j]) ./ sigma[ :,j];
      rtn[2][1, j] = normal_lpdf(y[ : , j] | mu_glm[ : , j], sigma[ :,j]);
    }
    return rtn;
}
  matrix[] ordinal_marginal(array [,] int y, matrix mu_glm, real[,] u, array [] int n_thr, array [] vector tau, matrix L){
    int N = rows(mu_glm);
    int K = cols(mu_glm);
    matrix[N,K] rtn[2];
    for(n in 1:N){
      vector[K] z[N];
      real prev = 0;
      for(k in 1:K){
        if(n_thr[k] == 1){ //binary
          real bound;
          bound = Phi(-(mu_glm[n,k] + prev) / L[k,k]);
          if(y[n,k] == 1){
            real t;
            t = bound + (1 - bound)*u[n,k];
            z[n,k] = inv_Phi(t);
            rtn[1][n,k] = z[n,k];
            rtn[2][n,k] = log1m(bound);
          } else{
            real t;
            t = bound*u[n,k];
            z[n,k] = inv_Phi(t);
            rtn[1][n,k] = z[n,k];
            rtn[2][n,k] = log(bound);
          }
        } else{ // polychotomous
          real t;
          if(y[n,k] == 1){
            real ub = Phi((tau[1+k-2,1] - (mu_glm[n,k] + prev)) / L[k,k]);
            t = ub*u[n,k];
            z[n,k] = inv_Phi(t);
            rtn[1][n,k] = z[n,k];
            rtn[2][n,k] = log(ub);
          } else if (y[n,k] == n_thr[k] + 1){
            real lb = Phi((tau[1+k-2,n_thr[k]] - (mu_glm[n,k] + prev)) / L[k,k]);
            t = lb + (1 - lb)*u[n,k];
            z[n,k] = inv_Phi(t);
            rtn[1][n,k] = z[n,k];
            rtn[2][n,k] = log1m(lb);
          } else{
            real lb = Phi((tau[1+k-2,y[n,k] - 1] - (mu_glm[n,k] + prev)) / L[k,k]);
            real ub = Phi((tau[1+k-2,y[n,k]] - (mu_glm[n,k] + prev)) / L[k,k]);
            t = lb + (ub-lb)*u[n,k];
            z[n,k] = inv_Phi(t);
            rtn[1][n,k] = z[n,k];
            rtn[2][n,k] = log(ub-lb);
          }
        }
        if(k < K){
          prev = L[k+1, 1:k]*head(z[n],k);
        }
      }
    }
    return rtn;
  }
}
data{
  int<lower=1> N; // total sample size
  int<lower=1> J; // # of continuous variables (normal marginal)
  int<lower=1> K; // # of ordinal variables (ordinal marginal)
  int<lower=1> Thr[K]; # of thresholds for ordinal variables
  matrix[N,J] Y_C; // continuous responses
  int Y_O[N,K]; // ordinal responses
  real X[N]; // predictor (age)
}
transformed data{
  int order[K+J]; // order of responses for copula -> ordinal (binary) first
  for(i in 1:K+J){
    order[i] = i;
  }
}
parameters{
  vector<lower=0>[J] c1; // first parameter of continuous mean function
  vector<lower=0>[J] c2; // second parameter of continuous mean function
  vector[J] c3; // third parameter of continuous mean function
  vector<lower=0>[J] k1; // first parameter of continuous noise function
  vector<lower=0>[J] k2; // second parameter of continuous noise function
  vector<lower=0>[K] o1; // first parameter of ordinal mean function
  ordered[Thr[3]]tau[2]; //cutpoints for ordinal model (polychtomous only)
  real<lower=0, upper=1> U[N,K]; //nuisance that absorbs inequality constraints
  cholesky_factor_corr[K+J] L; // cholesky factor of correlation matrix
}
model{
  // Priors
  c1 ~ normal(0,1);
  c2 ~ normal(0,1);
  c3 ~ normal(0,1);
  k1 ~ cauchy(0,1);
  k2 ~ normal(0,1);
  o1 ~ normal(0,5);
  L ~ lkj_corr_cholesky(4); //
  for(i in 1:2){
    for(t in 1:Thr[3]){
    tau[i,t] ~ normal(t+0.5,1); // cutpoint prior
    }
  }
  // Mean and SD Functions for Likelihood
  matrix[N,J] mu_J; // continuous
  matrix[N,J] sd_J; // continuous
  matrix[N,K] mu_K; // ordinal
  for(i in 1:N){
    for(j in 1:J){
      mu_J[i,j] = c2[j]*X[i]^c1[j] + c3[j];
      sd_J[i,j] = k1[j]*(1+k2[j]*X[i]);
    }
    for(k in 1:K){
      mu_K[i,k] = X[i]^o1[k];
    }
  }
  // Likelihood
  target += centered_gaussian_copula_cholesky_({ordinal_marginal(Y_O,mu_K,U,Thr,tau,L), normal_marginal(Y_C,mu_J,sd_J)},L,order);
}
generated quantities{
  corr_matrix[K+J] Omega = multiply_lower_tri_self_transpose(L);
}
2 Likes

It’s actually “easy” - depending on your level of comfort with Stan.

Put all your observed Y outcomes in a matrix, called Y_obs, and supply the indexes of the values for the full Y matrix for observed and missing. Create a matrix or vector of the size of your missing values in the parameters block, say Y_miss. In transformed parameters construct the full Y matrix using Y_obs and Y_miss. Use Y as you typically would.

1 Like

Thank you!

I presume I can do this for each outcome type and then just use the Y as normal within each marginal for the copula?

Do you have any examples (or links to such outside of the manual)? I think I just want to see the basics of the process you describe above. My only concern is the missing is not uniform across outcomes, so some outcomes will have different #'s of observed vs missing.

The varying number of missing is not a problem because you supply the index of missing for each marginal.

The complication is actually in the discrete outcomes. The easiest thing to do is to get out all missing discrete Y on the latent scale and then transform in generated quantities using the same logic in the lpdf but in the inverse.

I’m not at a computer for another day or so so don’t have any links to give atm.

1 Like

Thank you again!

I will give it a shot over the next day or so and may shoot another reply your way…

Still trying to wrap my head around a few things. The model currently supplies what your initial post describes as y_obs. Trying to figure out what the object you describe as “index values for the full matrix” looks like and the matrix/vector of y_miss looks like in turn, and then how to construct the full matrix (I should go back into the manual…). THEN I need to also figure out the discrete parameters…

1 Like

I think you may get rid of that chol2inv. I haven’t tested but see if this works.

It may be slightly slower since it constructs a J x N matrix instead of J x J but you may want to test it out.

   real multi_normal_cholesky_copula_lpdf(matrix U, matrix L) {
    int N = rows(U);
    int J = cols(U);
    matrix[J, N] G = mdivide_left_tri_low(L, U') ;
    return -N * sum(log(diagonal(L))) -  0.5 * ( sum( G^2 ) - sum(U^2) );
  }

@spinkney I can confirm that removal of chol2inv works and achieves identical results to the previous - takes about 4.5 hours to fit 5 variables.

In relation to the missing data, I am trying to fit the continuous missing first (in a bivariate case for now). I’m having issues most likely stemming from slicing and indexing ragged arrays. I include the code here for now. I have 2 response variables with different numbers of observed and missing. I can read each variable in separately and then put together at the end into the y matrix. Is there an efficient way to do this in one go - I’ll eventually have 60 variables (continuous and ordinal) and was trying to avoid including 60 iterations of y_obs and y_mis.

functions{
  real multi_normal_cholesky_copula_lpdf(matrix U, matrix L) {
    int N = rows(U);
    int J = cols(U);
    matrix[J, N] G = mdivide_left_tri_low(L, U') ;
    return -N * sum(log(diagonal(L))) -  0.5 * ( sum( G^2 ) - sum(U^2) );
  }
  
  real centered_gaussian_copula_cholesky_(matrix[,] marginals, matrix L, int[] order) {
    // Extract dimensions of final outcome matrix
    int N = rows(marginals[1][1]);
    int J = rows(L);
    matrix[N, J] U;

    // Iterate through marginal arrays, concatenating the outcome matrices by column
    // and aggregating the log-likelihoods (from continuous marginals) and jacobian
    // adjustments (from discrete marginals)
    real adj = 0;
    int pos = 1;
    for (m in 1 : size(marginals)) {
      int curr_cols = cols(marginals[m][1]);

      U[ : , pos : (pos + curr_cols - 1)] = marginals[m][1];

      adj += sum(marginals[m][2]);
      pos += curr_cols;
    }

    // Return the sum of the log-probability for copula outcomes and jacobian adjustments
    return multi_normal_cholesky_copula_lpdf(U[ : , order] | L) + adj;
  }
  
  matrix[] normal_marginal(matrix y, matrix mu_glm, matrix sigma) {
    int N = rows(mu_glm);
    int J = cols(mu_glm);
    matrix[N, J] rtn[2];
    // Initialise the jacobian adjustments to zero, as vectorised lpdf will be used
    rtn[2] = rep_matrix(0, N, J);

    for (j in 1 : J) {
      rtn[1][ : , j] = (y[ : , j] - mu_glm[ : , j]) ./ sigma[ :,j];
      rtn[2][1, j] = normal_lpdf(y[ : , j] | mu_glm[ : , j], sigma[ :,j]);
    }

    return rtn;
}
}
data{
  int<lower=1> J; // # of continuous variables (normal marginal)
  int<lower=0> N_obs[J]; // total number of observed
  int<lower=0> N_mis[J]; // number missing
  int<lower=1, upper=N_obs+N_mis> ii_obs[N_obs,J]; // indices of observed
  int<lower=1, upper=N_obs+N_mis> ii_mis[N_mis,J]; // indices of missing
  matrix[N_obs,J] Y_C; // non-missing continuous responses
  real X[N_obs+N_mis]; // predictor (age)
}
transformed data{
  int<lower=0> N = N_obs + N_mis;//
  int order[J]; // order of responses for copula -> ordinal (binary) first
  for(i in 1:J){
    order[i] = i;
  }
}
parameters{
  vector[J] y_mis[N_mis]; // missing data parameter - UNSURE
  
  vector<lower=0>[J] c1; // first parameter of continuous mean function
  vector<lower=0>[J] c2; // second parameter of continuous mean function
  vector[J] c3; // third parameter of continuous mean function
  vector<lower=0>[J] k1; // first parameter of continuous noise function
  vector<lower=0>[J] k2; // second parameter of continuous noise function
  cholesky_factor_corr[J] L; // cholesky factor of correlation matrix 
}
transformed parameters{
  matrix[N,J] y; // the data, observed + missing
  y[ii_obs,J] = Y_C;
  y[ii_mis,J] = y_mis;
}
model{
  // Priors
  c1 ~ normal(0,1);
  c2 ~ normal(0,1);
  c3 ~ normal(0,1);
  k1 ~ cauchy(0,1);
  k2 ~ normal(0,1);
  L ~ lkj_corr_cholesky(4); // 

  // Mean and SD Functions for Likelihood
  matrix[N,J] mu_J; // continuous
  matrix[N,J] sd_J; // continuous
  for(i in 1:N){
    for(j in 1:J){
      mu_J[i,j] = c2[j]*X[i]^c1[j] + c3[j];
      sd_J[i,j] = k1[j]*(1+k2[j]*X[i]);
    }
  }
  
  // Likelihood
  target += centered_gaussian_copula_cholesky_({normal_marginal(y,mu_J,sd_J)},L,order);
  
}
generated quantities{
  corr_matrix[J] Omega = multiply_lower_tri_self_transpose(L);
}

Does that code work above or are you having issues?

Also how long did the model with chol2inv take?

  1. The model with chol2inv was ~ 30 minutes faster.
  2. The code above would not compile - I would receive errors about array construction. I tried a slightly different route. I post the stan code and r code associated below. There are runtime errors here associated with treedepth, bulk ess, and bfmi. I think these “may” be solved if I adjust the priors and sampling/warm-up time based on the fact this log probability is based on 2 continuous variable and not the 5 I fit previously. I had to do a lot of adjusting previously to fit the initial model I described with continuous and ordinal combined.

However, maybe there is a more efficient way to write this model? I now need to also include the ordinal data.

dat_new <- dat %>% filter(location == "US") %>% select(FDL_L, HDL_L) %>% na_if(-1) %>% droplevels() # 2 continuous response vars
age <- dat_new$agey # X var

# Extract the missing values into a VECTOR
dat_complete <- dat_new[!is.na(dat_new)]

# Extract the missing and present values as MATRICES
ind_pres <- which(!is.na(dat_new), arr.ind = TRUE)
ind_miss <- which(is.na(dat_new), arr.ind = TRUE)
mod.data <- list(J = 2,
                 Nrow = nrow(dat_new),
                 Ncol = ncol(dat_new),
                 Ncomp = length(dat_complete),
                 Nmiss = sum(is.na(dat_new)),
                 dat_complete = dat_complete,
                 ind_pres = ind_pres,
                 ind_miss = ind_miss, 
                 X = age)

test_miss <- stan("cont_missing.stan", data = mod.data,control = list(max_treedepth = 12),init = "0")
functions{
  real multi_normal_cholesky_copula_lpdf(matrix U, matrix L) {
    int N = rows(U);
    int J = cols(U);
    matrix[J, N] G = mdivide_left_tri_low(L, U') ;
    return -N * sum(log(diagonal(L))) -  0.5 * ( sum( G^2 ) - sum(U^2) );
  }
  
  real centered_gaussian_copula_cholesky_(matrix[,] marginals, matrix L, int[] order) {
    // Extract dimensions of final outcome matrix
    int N = rows(marginals[1][1]);
    int J = rows(L);
    matrix[N, J] U;

    // Iterate through marginal arrays, concatenating the outcome matrices by column
    // and aggregating the log-likelihoods (from continuous marginals) and jacobian
    // adjustments (from discrete marginals)
    real adj = 0;
    int pos = 1;
    for (m in 1 : size(marginals)) {
      int curr_cols = cols(marginals[m][1]);

      U[ : , pos : (pos + curr_cols - 1)] = marginals[m][1];

      adj += sum(marginals[m][2]);
      pos += curr_cols;
    }

    // Return the sum of the log-probability for copula outcomes and jacobian adjustments
    return multi_normal_cholesky_copula_lpdf(U[ : , order] | L) + adj;
  }
  
  matrix[] normal_marginal(matrix y, matrix mu_glm, matrix sigma) {
    int N = rows(mu_glm);
    int J = cols(mu_glm);
    matrix[N, J] rtn[2];
    // Initialise the jacobian adjustments to zero, as vectorised lpdf will be used
    rtn[2] = rep_matrix(0, N, J);

    for (j in 1 : J) {
      rtn[1][ : , j] = (y[ : , j] - mu_glm[ : , j]) ./ sigma[ :,j];
      rtn[2][1, j] = normal_lpdf(y[ : , j] | mu_glm[ : , j], sigma[ :,j]);
    }

    return rtn;
}
}
data{
  int J; // # of outcomes
  int<lower=0> Nrow;
  int<lower=0> Ncol;
  int<lower=0> Ncomp; // Number of non-missing values
  int<lower=0> Nmiss; // Number of missing values
  real dat_complete[Ncomp];   // Vector of non-missing values
  int ind_pres[Ncomp, 2];     // Matrix (row, col) of non-missing value indices
  int ind_miss[Nmiss, 2];// Matrix (row, col) of missing value indices
  real X[Nrow]; // predictor (age)
  }
transformed data{
  int order[J]; // order of responses for copula -> ordinal (binary) first
  for(i in 1:J){
    order[i] = i;
  }
}
parameters{
  real ymiss[Nmiss]; // missing data parameter - UNSURE
  
  vector<lower=0>[J] c1; // first parameter of continuous mean function
  vector<lower=0>[J] c2; // second parameter of continuous mean function
  vector[J] c3; // third parameter of continuous mean function
  vector<lower=0>[J] k1; // first parameter of continuous noise function
  vector<lower=0>[J] k2; // second parameter of continuous noise function
  cholesky_factor_corr[J] L; // cholesky factor of correlation matrix 
}
transformed parameters{
  matrix[Nrow,Ncol] y;   // The "data" with interpolated missing values  y[ii_obs,J] = Y_C;
  // Fill y with non-missing values 
    for(n in 1:Ncomp) {
        y[ind_pres[n,1]][ind_pres[n,2]] = dat_complete[n];
    }
    // Fill the rest of y with missing value "parameters"
    for(n in 1:Nmiss){
        y[ind_miss[n,1]][ind_miss[n,2]] = ymiss[n];
    }
}
model{
  // Priors
  c1 ~ normal(0,1);
  c2 ~ normal(0,1);
  c3 ~ normal(0,1);
  k1 ~ cauchy(0,1);
  k2 ~ normal(0,1);
  L ~ lkj_corr_cholesky(4); // 

  // Mean and SD Functions for Likelihood
  matrix[Nrow,J] mu_J; // continuous
  matrix[Nrow,J] sd_J; // continuous
  for(i in 1:Nrow){
    for(j in 1:J){
      mu_J[i,j] = c2[j]*X[i]^c1[j] + c3[j];
      sd_J[i,j] = k1[j]*(1+k2[j]*X[i]);
    }
  }
  
  // Likelihood
  target += centered_gaussian_copula_cholesky_({normal_marginal(y,mu_J,sd_J)},L,order);
  
}
generated quantities{
  corr_matrix[J] Omega = multiply_lower_tri_self_transpose(L);
}

@spinkney After adjusting the prior on the cholesky factor, putting a vague prior on the missing values, and running 1000 warmup and 6000 iterations I have fit a bivariate copula (continuous margins only) with missing and observed data.

One last step/complication - incorporating the missing ordinal data…

functions{
  real multi_normal_cholesky_copula_lpdf(matrix U, matrix L) {
    int N = rows(U);
    int J = cols(U);
    matrix[J, N] G = mdivide_left_tri_low(L, U') ;
    return -N * sum(log(diagonal(L))) -  0.5 * ( sum( G^2 ) - sum(U^2) );
  }
  
  real centered_gaussian_copula_cholesky_(matrix[,] marginals, matrix L, int[] order) {
    // Extract dimensions of final outcome matrix
    int N = rows(marginals[1][1]);
    int J = rows(L);
    matrix[N, J] U;

    // Iterate through marginal arrays, concatenating the outcome matrices by column
    // and aggregating the log-likelihoods (from continuous marginals) and jacobian
    // adjustments (from discrete marginals)
    real adj = 0;
    int pos = 1;
    for (m in 1 : size(marginals)) {
      int curr_cols = cols(marginals[m][1]);

      U[ : , pos : (pos + curr_cols - 1)] = marginals[m][1];

      adj += sum(marginals[m][2]);
      pos += curr_cols;
    }

    // Return the sum of the log-probability for copula outcomes and jacobian adjustments
    return multi_normal_cholesky_copula_lpdf(U[ : , order] | L) + adj;
  }
  
  matrix[] normal_marginal(matrix y, matrix mu_glm, matrix sigma) {
    int N = rows(mu_glm);
    int J = cols(mu_glm);
    matrix[N, J] rtn[2];
    // Initialise the jacobian adjustments to zero, as vectorised lpdf will be used
    rtn[2] = rep_matrix(0, N, J);

    for (j in 1 : J) {
      rtn[1][ : , j] = (y[ : , j] - mu_glm[ : , j]) ./ sigma[ :,j];
      rtn[2][1, j] = normal_lpdf(y[ : , j] | mu_glm[ : , j], sigma[ :,j]);
    }

    return rtn;
}
}
data{
  int J; // # of outcomes
  int<lower=0> Nrow;
  int<lower=0> Ncol;
  int<lower=0> Ncomp; // Number of non-missing values
  int<lower=0> Nmiss; // Number of missing values
  real dat_complete[Ncomp];   // Vector of non-missing values
  int ind_pres[Ncomp, 2];     // Matrix (row, col) of non-missing value indices
  int ind_miss[Nmiss, 2];// Matrix (row, col) of missing value indices
  real X[Nrow]; // predictor (age)
  }
transformed data{
  int order[J]; // order of responses for copula -> ordinal (binary) first
  for(i in 1:J){
    order[i] = i;
  }
}
parameters{
  real ymiss[Nmiss]; // missing data parameter - UNSURE
  
  vector<lower=0>[J] c1; // first parameter of continuous mean function
  vector<lower=0>[J] c2; // second parameter of continuous mean function
  vector[J] c3; // third parameter of continuous mean function
  vector<lower=0>[J] k1; // first parameter of continuous noise function
  vector<lower=0>[J] k2; // second parameter of continuous noise function
  cholesky_factor_corr[J] L; // cholesky factor of correlation matrix 
}
transformed parameters{
  matrix[Nrow,Ncol] y;   // The "data" with interpolated missing values
  // Fill y with non-missing values 
    for(n in 1:Ncomp) {
        y[ind_pres[n,1]][ind_pres[n,2]] = dat_complete[n];
    }
    // Fill the rest of y with missing value "parameters"
    for(n in 1:Nmiss){
        y[ind_miss[n,1]][ind_miss[n,2]] = ymiss[n];
    }
}
model{
  // Priors
  c1 ~ normal(0,1);
  c2 ~ normal(0,1);
  c3 ~ normal(0,1);
  k1 ~ cauchy(0,1);
  k2 ~ normal(0,1);
  L ~ lkj_corr_cholesky(1); //
  ymiss ~ normal(0,100);

  // Mean and SD Functions for Likelihood
  matrix[Nrow,J] mu_J; // continuous
  matrix[Nrow,J] sd_J; // continuous
  for(i in 1:Nrow){
    for(j in 1:J){
      mu_J[i,j] = c2[j]*X[i]^c1[j] + c3[j];
      sd_J[i,j] = k1[j]*(1+k2[j]*X[i]);
    }
  }
  
  // Likelihood
  target += centered_gaussian_copula_cholesky_({normal_marginal(y,mu_J,sd_J)},L,order);
  
}
generated quantities{
  corr_matrix[J] Omega = multiply_lower_tri_self_transpose(L);
}

1 Like

Why not put the prior on ymiss of normal(mu_j, sd_j)?

You know - I do not have an answer for why I forgot about that - I will adjust.

Do you have any suggestions on the discrete outcomes (either binary or polychotomous)?

Hello all!

Just want to bump this for some additional advice on the missing discrete component of this model.

I’d like to marginalize over all missing values so I can use all available data as opposed to complete cases only. I’ve gone through the manual’s chapters on both missing data and latent discrete parameters (i.e., change-point models), but am struggling how to translate to my use case.

My first stab was to use the knowledge from @bgoodri 's long ago musings on the tMVN (
tMVN.pdf (2.4 MB) that eventually led to example code for the multivariate probit. We know z = \phi^{-1}(u) where u is a standard uniform variate. Because the missing data is unbounded, I use the code from the pdf and set z[n,k] = inv_Phi(u[n,k]). Further, I set the jacobian transformation to 0 (ln(1)). Note, I’ve coded all missing data as 99. I’ve run this model and it fits, but I am very positive this is quite wrong and not what I want to do. I have found an example of missing data in a multivariate probit model at here (from @NikVetr github), but not sure how this may or may not relate. I know I need to sum over the possibilities of the missing data, just trying to figure out the most efficient means possible.

functions{
  real multi_normal_cholesky_copula_lpdf(matrix U, matrix L) {
    int N = rows(U);
    int J = cols(U);
    matrix[J, N] G = mdivide_left_tri_low(L, U') ;
    return -N * sum(log(diagonal(L))) -  0.5 * ( sum( G^2 ) - sum(U^2) );
  }
  
  real centered_gaussian_copula_cholesky_(matrix[,] marginals, matrix L, int[] order) {
    // Extract dimensions of final outcome matrix
    int N = rows(marginals[1][1]);
    int J = rows(L);
    matrix[N, J] U;

    // Iterate through marginal arrays, concatenating the outcome matrices by column
    // and aggregating the log-likelihoods (from continuous marginals) and jacobian
    // adjustments (from discrete marginals)
    real adj = 0;
    int pos = 1;
    for (m in 1 : size(marginals)) {
      int curr_cols = cols(marginals[m][1]);

      U[ : , pos : (pos + curr_cols - 1)] = marginals[m][1];

      adj += sum(marginals[m][2]);
      pos += curr_cols;
    }

    // Return the sum of the log-probability for copula outcomes and jacobian adjustments
    return multi_normal_cholesky_copula_lpdf(U[ : , order] | L) + adj;
  }
  
  matrix[] ordinal_marginal(array [,] int y, matrix mu_glm, real[,] u, array [] int n_thr, array [] vector tau, matrix L){
    int N = rows(mu_glm);
    int K = cols(mu_glm);
    matrix[N,K] rtn[2];
    
    for(n in 1:N){
      vector[K] z[N];
      real prev = 0;
      for(k in 1:K){
        if(n_thr[k] == 1){ //binary
          if(y[n,k] == 99){
            z[n,k] = inv_Phi(u[n,k]);
            rtn[1][n,k] = z[n,k];
            rtn[2][n,k] = 0;
          } else if(y[n,k] == 1){
            real bound;
            bound = Phi(-(mu_glm[n,k] + prev) / L[k,k]);
            real t;
            t = bound + (1 - bound)*u[n,k];
            z[n,k] = inv_Phi(t);
            rtn[1][n,k] = z[n,k];
            rtn[2][n,k] = log1m(bound);
          } else{
            real bound;
            bound = Phi(-(mu_glm[n,k] + prev) / L[k,k]);
            real t;
            t = bound*u[n,k];
            z[n,k] = inv_Phi(t);
            rtn[1][n,k] = z[n,k];
            rtn[2][n,k] = log(bound);
          }
        } else{ // polychotomous
          if(y[n,k] == 99){
            z[n,k] = inv_Phi(u[n,k]);
            rtn[1][n,k] = z[n,k];
            rtn[2][n,k] = 0;
          } else if(y[n,k] == 1){
            real t;
            real ub = Phi((tau[1+k-2,1] - (mu_glm[n,k] + prev)) / L[k,k]);
            t = ub*u[n,k];
            z[n,k] = inv_Phi(t);
            rtn[1][n,k] = z[n,k];
            rtn[2][n,k] = log(ub);
          } else if (y[n,k] == n_thr[k] + 1){
            real t;
            real lb = Phi((tau[1+k-2,n_thr[k]] - (mu_glm[n,k] + prev)) / L[k,k]);
            t = lb + (1 - lb)*u[n,k];
            z[n,k] = inv_Phi(t);
            rtn[1][n,k] = z[n,k];
            rtn[2][n,k] = log1m(lb);
          } else{
            real t;
            real lb = Phi((tau[1+k-2,y[n,k] - 1] - (mu_glm[n,k] + prev)) / L[k,k]);
            real ub = Phi((tau[1+k-2,y[n,k]] - (mu_glm[n,k] + prev)) / L[k,k]);
            t = lb + (ub-lb)*u[n,k];
            z[n,k] = inv_Phi(t);
            rtn[1][n,k] = z[n,k];
            rtn[2][n,k] = log(ub-lb);
          }
        }
        if(k < K){
          prev = L[k+1, 1:k]*head(z[n],k);
        }
      }
    }
    
    return rtn;
  }
  
}
data{
  int<lower=1> N; // total sample size
  int<lower=1> K; // # of ordinal variables (ordinal marginal)
  int<lower=1> Thr[K]; # of thresholds for ordinal variables
  int Y[N,K]; // ordinal responses
  real X[N]; // predictor (age)
}
transformed data{
  int order[K]; // order of responses for copula -> ordinal (binary) first
  for(i in 1:K){
    order[i] = i;
  }
}
parameters{
  vector<lower=0>[K] o1; // first parameter of ordinal mean function
  ordered[Thr[3]]tau[2]; //cutpoints for ordinal model (polychtomous only)
  real<lower=0, upper=1> U[N,K]; //nuisance that absorbs inequality constraints
  cholesky_factor_corr[K] L; // cholesky factor of correlation matrix 
}
model{
  matrix[N,K] mu;
  for(n in 1:N){
    for(k in 1:K){
      mu[n,k] = X[n]^o1[k];
    }
  }
  
  // Priors
  o1 ~ normal(0,5);
  L ~ lkj_corr_cholesky(4); // 
  for(i in 1:2){
    for(t in 1:Thr[3]){
    tau[i,t] ~ normal(t+0.5,1); // cutpoint prior
    }
  }
  
  // Likelihood
  target += centered_gaussian_copula_cholesky_({ordinal_marginal(Y,mu,U,Thr,tau,L)},L,order);
  
}
generated quantities{
  corr_matrix[K] Omega = multiply_lower_tri_self_transpose(L);
}