I have found the easiest approach for copulas (by far) is to implement the variable augmented approach of Smith and Khaled (2012).
Specifically, you can write the complete data density as
f(y_c, y_d, u) = c(u | \Gamma) \left[ \prod_{j \in \mathcal{C}} f_j(y_j) \right] \left[ \prod_{j \in \mathcal{D}} I( F_j(y_j-1) \le u_{j} \le F_j(y_j) ) \right],
where c(\cdot) is the copula density function, \mathcal{C} contains the indices of the continuous margins, \mathcal{D} contains the indices of the discrete margins, and
u_{j} = F_j(y_j) \text{ if } j \in \mathcal{C}.
Note that the above density implies
U_j \sim U( F_j(y_j - 1), F_j(y_j) ), \ \ j \in \mathcal{D}
so your latent U_j's will have varying bounds.
For the Gaussian copula, we have
c(u | \Gamma) = |\Gamma|^{-1/2} \exp\left\{ q(u)'[ \Gamma^{-1} - I ] q(u) \right\}
where q(u) = (q_1, \ldots, q_J)' = ( \Phi^{-1}(u_1), \ldots, \Phi^{-1}(u_J ) )'.
If the j^{th} margin is Gaussian, then u_j = \Phi((y_j - \mu_j) / \sigma_j) is not really latent and we have q_j = \Phi^{-1}[ \Phi( (y_j - \mu_j) / \sigma_j ) ] = (y_j - \mu_j) / \sigma_j.
The following Stan code appears to reasonably cover the truth for simulated data. It should be relatively straightforward to extend this to a regression setting. I wrote the copula density function in terms of q(u) rather than u itself since, as noted above, writing it in terms of u would result in unnecessary calculations for normal margins.
functions {
real gaussian_copula_mvn_chol_lpdf(
matrix Q, matrix L
) {
int n = rows(Q);
int K = cols(Q);
matrix[K,K] Rhoinv = chol2inv(L);
// M = (Rho^(-1) - I) * Q'Q = A * Q'Q
matrix[K,K] M = add_diag(Rhoinv, -1.) .* crossprod(Q);
// tr(A %*% Q'Q) = sum(A * Q'Q)
return -n * sum(log(diagonal(L))) - 0.5 * sum(M);
}
}
data {
int<lower=0> n; // sample size
int<lower=0> Jn; // number of normal margins
int<lower=0> Jb; // number of binary margins
matrix[n, Jn] yn; // normal rqandom variates
array[n, Jb] int<lower=0,upper=1> yb; // binary random variates
}
transformed data {
int J = Jn + Jb;
matrix[n,Jb] ybmat = to_matrix(yb);
}
parameters {
cholesky_factor_corr[J] L;
vector[Jn] mu;
vector<lower=0>[Jn] sigma;
vector<lower=0,upper=1>[Jb] p;
matrix<lower=0,upper=1>[n,Jb] u_raw;
}
model {
//---------------------------
// Temporary variables
//---------------------------
matrix[n,J] Q; // Partially latent Q matrix (Q[, j] = Phi_inv(U[, j]))
vector[n] Lb; // lower bound for uniform variates
vector[n] Ub; // upper bound for uniform variates
vector[n] Db; // = Ub - Lb (useful for Jacobian)
int k = 0; // counter for constructing Q matrix
//---------------------------
// Priors
//---------------------------
L ~ lkj_corr_cholesky(1.0);
mu ~ normal(0, 10);
sigma ~ normal(0, 10);
//---------------------------
// Complete data likelihood
//---------------------------
// Normal outcomes
for ( j in 1:Jn ) {
k += 1;
yn[, j] ~ normal(mu[j], sigma[j]);
Q[, k] = inv(sigma[j]) * (yn[, j] - mu[j]); // ~ U(0, 1)
}
// Binary outcomes
for ( j in 1:Jb ) {
k += 1;
Lb = ybmat[, j] * (1 - p[j]); // lower bound of latent uniform
Ub = 1 - (1 - ybmat[, j]) * p[j]; // upper bound of latent uniform
Db = Ub - Lb; // useful for jacobian adjustment
target += log(Db); // Jacobian adjustment
Q[, k] = inv_Phi(Lb + Db .* u_raw[, j]);
}
// Copula density
Q ~ gaussian_copula_mvn_chol(L);
}
generated quantities {
matrix[J,J] Corr = multiply_lower_tri_self_transpose(L);
}
To simulate copula variables in R and test the approach, I have
## Generate copula random variates
n <- 500
rho <- 0.5
Jn <- 2
Jb <- 2
mu <- rep(1, Jn)
sigma <- rep(1, Jn)
p <- rep(0.3, Jb)
J <- Jn + Jb
Rho <- diag(1 - rho, J) + matrix(rho, J, J)
Q <- mvtnorm::rmvnorm(n, sigma = Rho)
U <- pnorm(Q)
Yn <- matrix(nrow = n, ncol = Jn)
Yb <- matrix(nrow = n, ncol = Jb)
k <- 0
for ( j in 1:Jn ) {
k <- k + 1
Yn[, j] <- qnorm(U[, k], mean = mu[j], sd = sigma[j])
}
for ( j in 1:Jb ) {
k <- k + 1
Yb[, j] <- qbinom(U[, k], size = 1, prob = p[j])
}
standata <- list(
n = n, Jn = Jn, Jb = Jb, yn = Yn, yb = Yb
)
fit <- model$sample(
data = standata
, iter_warmup = 1000, iter_sampling = 1000
, chains = 9, parallel_chains = 9
)
fit$summary(variables = c('mu', 'sigma', 'p', 'Corr')) %>% print(n = 200)
This results in posterior samples that reasonably reflect the truth
variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 mu[1] 0.947 0.947 0.0451 0.0461 0.874 1.02 1.00 6727. 6624.
2 mu[2] 0.898 0.899 0.0458 0.0466 0.823 0.974 1.00 6456. 6927.
3 sigma[1] 1.01 1.01 0.0316 0.0313 0.956 1.06 1.00 7380. 6586.
4 sigma[2] 1.03 1.03 0.0321 0.0325 0.980 1.09 1.00 7085. 6824.
5 p[1] 0.259 0.259 0.0192 0.0194 0.228 0.291 1.00 8620. 6578.
6 p[2] 0.297 0.296 0.0203 0.0202 0.264 0.331 1.00 8743. 6519.
7 Corr[1,1] 1 1 0 0 1 1 NA NA NA
8 Corr[2,1] 0.541 0.542 0.0317 0.0323 0.489 0.592 1.00 7311. 6623.
9 Corr[3,1] 0.468 0.470 0.0497 0.0493 0.384 0.548 1.00 3634. 5343.
10 Corr[4,1] 0.449 0.451 0.0498 0.0498 0.364 0.527 1.00 4239. 5511.
11 Corr[1,2] 0.541 0.542 0.0317 0.0323 0.489 0.592 1.00 7311. 6623.
12 Corr[2,2] 1 1 0 0 1 1 NA NA NA
13 Corr[3,2] 0.479 0.480 0.0485 0.0485 0.397 0.556 1.00 3478. 4987.
14 Corr[4,2] 0.459 0.460 0.0490 0.0484 0.377 0.538 1.00 4263. 5900.
15 Corr[1,3] 0.468 0.470 0.0497 0.0493 0.384 0.548 1.00 3634. 5343.
16 Corr[2,3] 0.479 0.480 0.0485 0.0485 0.397 0.556 1.00 3478. 4987.
17 Corr[3,3] 1 1 0 0 1 1 NA NA NA
18 Corr[4,3] 0.373 0.375 0.0694 0.0701 0.255 0.485 1.01 2242. 3869.
19 Corr[1,4] 0.449 0.451 0.0498 0.0498 0.364 0.527 1.00 4239. 5511.
20 Corr[2,4] 0.459 0.460 0.0490 0.0484 0.377 0.538 1.00 4263. 5900.
21 Corr[3,4] 0.373 0.375 0.0694 0.0701 0.255 0.485 1.01 2242. 3869.
22 Corr[4,4] 1 1 0 0 1 1 NA NA NA