Hi All!
Basic Question
How can I model missing data associated with a multivariate vector of outcomes in a mixed discrete-continuous Gaussian copula?
Specifics
First, I must thank both @spinkney and @andrjohns who helped here and @martinmodrak who helped here.
I have a multi-dimensional response vector of mixed discrete (binary and polychotomous) and continuous outcomes. I wish to learn the joint distribution of this series of traits and their dependence structure, particularly as it relates to a single predictor variable (x is a single real
value per observation representing age in years). Based on reading and previous posts here I turned towards a copula, which provides a way to decouple the joint into marginals and examine the dependency structure of the large (~60 dimensions) multivariate space. Based on @spinkney 's helpful work here, @andrjohns brms implementation here, and @martinmodrak 's advice here (that draws from @bgoodri 's multi-probit implementation here), I have fit a mixed ordinal and continuous gaussian copula. All diagnostics look good and the only adjustment made in sampling is max_treedepth to 13 and running 4 chains, 6000 iterations (1000 warmup). I share some of the pairs
here (C1 and C2 may cause troubles later?, but they are highly correlated given their relationship in the mean function…)
My main question is related to missing data. Right now I’ve fit the model to 5 traits (2 continuous, 1 binary, 2 polychotomous) with no missing cases - this is only n=195. I know to learn correlations we need LOTS of data and the total N is 1305, but the matrix is sparse in places with missing data across each data type. I have read through Stan’s chapters on missing data, but I am a little lost how to apply in the case of a copula. Essentially, I want to account for missing data across each trait in the model. For now I’ve got 5 traits, eventually I will have ~60 traits.
Let me know if there are any questions about the model! Thank you for any advice :)
functions{
matrix chol2inv2(matrix L) {
int K = rows(L);
matrix[K, K] K_identity = diag_matrix(rep_vector(1.0, K));
matrix[K, K] L_inv = mdivide_left_tri_low(L, K_identity);
matrix[K, K] rtn;
if (K == 0) {
return L;
}
if (K == 1) {
rtn[1, 1] = inv_square(L[1, 1]);
} else {
for (k in 1:K) {
rtn[k, k] = dot_self(tail(L_inv[ : , k], K + 1 - k));
for (j in (k + 1):K) {
int Kmj = K - j;
rtn[k, j] = dot_product(tail(L_inv[ : , k], Kmj + 1),
tail(L_inv[ : , j], Kmj + 1));
rtn[j, k] = rtn[k, j];
}
}
}
return rtn;
}
real multi_normal_cholesky_copula_lpdf(matrix U, matrix L) {
int N = rows(U);
int J = cols(U);
matrix[J, J] Gammainv = chol2inv2(L);
return -N * sum(log(diagonal(L))) - 0.5 * sum(add_diag(Gammainv, -1.0) .* crossprod(U));
}
real centered_gaussian_copula_cholesky_(matrix[,] marginals, matrix L, int[] order) {
// Extract dimensions of final outcome matrix
int N = rows(marginals[1][1]);
int J = rows(L);
matrix[N, J] U;
// Iterate through marginal arrays, concatenating the outcome matrices by column
// and aggregating the log-likelihoods (from continuous marginals) and jacobian
// adjustments (from discrete marginals)
real adj = 0;
int pos = 1;
for (m in 1 : size(marginals)) {
int curr_cols = cols(marginals[m][1]);
U[ : , pos : (pos + curr_cols - 1)] = marginals[m][1];
adj += sum(marginals[m][2]);
pos += curr_cols;
}
// Return the sum of the log-probability for copula outcomes and jacobian adjustments
return multi_normal_cholesky_copula_lpdf(U[ : , order] | L) + adj;
}
matrix[] normal_marginal(matrix y, matrix mu_glm, matrix sigma) {
int N = rows(mu_glm);
int J = cols(mu_glm);
matrix[N, J] rtn[2];
// Initialise the jacobian adjustments to zero, as vectorised lpdf will be used
rtn[2] = rep_matrix(0, N, J);
for (j in 1 : J) {
rtn[1][ : , j] = (y[ : , j] - mu_glm[ : , j]) ./ sigma[ :,j];
rtn[2][1, j] = normal_lpdf(y[ : , j] | mu_glm[ : , j], sigma[ :,j]);
}
return rtn;
}
matrix[] ordinal_marginal(array [,] int y, matrix mu_glm, real[,] u, array [] int n_thr, array [] vector tau, matrix L){
int N = rows(mu_glm);
int K = cols(mu_glm);
matrix[N,K] rtn[2];
for(n in 1:N){
vector[K] z[N];
real prev = 0;
for(k in 1:K){
if(n_thr[k] == 1){ //binary
real bound;
bound = Phi(-(mu_glm[n,k] + prev) / L[k,k]);
if(y[n,k] == 1){
real t;
t = bound + (1 - bound)*u[n,k];
z[n,k] = inv_Phi(t);
rtn[1][n,k] = z[n,k];
rtn[2][n,k] = log1m(bound);
} else{
real t;
t = bound*u[n,k];
z[n,k] = inv_Phi(t);
rtn[1][n,k] = z[n,k];
rtn[2][n,k] = log(bound);
}
} else{ // polychotomous
real t;
if(y[n,k] == 1){
real ub = Phi((tau[1+k-2,1] - (mu_glm[n,k] + prev)) / L[k,k]);
t = ub*u[n,k];
z[n,k] = inv_Phi(t);
rtn[1][n,k] = z[n,k];
rtn[2][n,k] = log(ub);
} else if (y[n,k] == n_thr[k] + 1){
real lb = Phi((tau[1+k-2,n_thr[k]] - (mu_glm[n,k] + prev)) / L[k,k]);
t = lb + (1 - lb)*u[n,k];
z[n,k] = inv_Phi(t);
rtn[1][n,k] = z[n,k];
rtn[2][n,k] = log1m(lb);
} else{
real lb = Phi((tau[1+k-2,y[n,k] - 1] - (mu_glm[n,k] + prev)) / L[k,k]);
real ub = Phi((tau[1+k-2,y[n,k]] - (mu_glm[n,k] + prev)) / L[k,k]);
t = lb + (ub-lb)*u[n,k];
z[n,k] = inv_Phi(t);
rtn[1][n,k] = z[n,k];
rtn[2][n,k] = log(ub-lb);
}
}
if(k < K){
prev = L[k+1, 1:k]*head(z[n],k);
}
}
}
return rtn;
}
}
data{
int<lower=1> N; // total sample size
int<lower=1> J; // # of continuous variables (normal marginal)
int<lower=1> K; // # of ordinal variables (ordinal marginal)
int<lower=1> Thr[K]; # of thresholds for ordinal variables
matrix[N,J] Y_C; // continuous responses
int Y_O[N,K]; // ordinal responses
real X[N]; // predictor (age)
}
transformed data{
int order[K+J]; // order of responses for copula -> ordinal (binary) first
for(i in 1:K+J){
order[i] = i;
}
}
parameters{
vector<lower=0>[J] c1; // first parameter of continuous mean function
vector<lower=0>[J] c2; // second parameter of continuous mean function
vector[J] c3; // third parameter of continuous mean function
vector<lower=0>[J] k1; // first parameter of continuous noise function
vector<lower=0>[J] k2; // second parameter of continuous noise function
vector<lower=0>[K] o1; // first parameter of ordinal mean function
ordered[Thr[3]]tau[2]; //cutpoints for ordinal model (polychtomous only)
real<lower=0, upper=1> U[N,K]; //nuisance that absorbs inequality constraints
cholesky_factor_corr[K+J] L; // cholesky factor of correlation matrix
}
model{
// Priors
c1 ~ normal(0,1);
c2 ~ normal(0,1);
c3 ~ normal(0,1);
k1 ~ cauchy(0,1);
k2 ~ normal(0,1);
o1 ~ normal(0,5);
L ~ lkj_corr_cholesky(4); //
for(i in 1:2){
for(t in 1:Thr[3]){
tau[i,t] ~ normal(t+0.5,1); // cutpoint prior
}
}
// Mean and SD Functions for Likelihood
matrix[N,J] mu_J; // continuous
matrix[N,J] sd_J; // continuous
matrix[N,K] mu_K; // ordinal
for(i in 1:N){
for(j in 1:J){
mu_J[i,j] = c2[j]*X[i]^c1[j] + c3[j];
sd_J[i,j] = k1[j]*(1+k2[j]*X[i]);
}
for(k in 1:K){
mu_K[i,k] = X[i]^o1[k];
}
}
// Likelihood
target += centered_gaussian_copula_cholesky_({ordinal_marginal(Y_O,mu_K,U,Thr,tau,L), normal_marginal(Y_C,mu_J,sd_J)},L,order);
}
generated quantities{
corr_matrix[K+J] Omega = multiply_lower_tri_self_transpose(L);
}